{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%env LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
    "%env LLM_API_KEY=sk-替换为自己的Qwen API_KEY"
   ],
   "id": "4b197bac32c3d219"
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "af375836-b870-458b-87d1-4e00565977eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:32.299194Z",
     "iopub.status.busy": "2024-12-04T13:34:32.298714Z",
     "iopub.status.idle": "2024-12-04T13:34:32.311517Z",
     "shell.execute_reply": "2024-12-04T13:34:32.311088Z",
     "shell.execute_reply.started": "2024-12-04T13:34:32.299181Z"
    },
    "papermill": {
     "duration": 0.115454,
     "end_time": "2024-11-23T14:29:00.919641",
     "exception": false,
     "start_time": "2024-11-23T14:29:00.804187",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%capture --no-stderr\n",
    "!pip install -U langchain langchain_community langchain_openai pypdf sentence_transformers chromadb shutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1e2c72b8-ee12-4130-af88-699998aa230c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:32.312103Z",
     "iopub.status.busy": "2024-12-04T13:34:32.311980Z",
     "iopub.status.idle": "2024-12-04T13:34:32.527459Z",
     "shell.execute_reply": "2024-12-04T13:34:32.526913Z",
     "shell.execute_reply.started": "2024-12-04T13:34:32.312090Z"
    },
    "papermill": {
     "duration": 0.319981,
     "end_time": "2024-11-23T14:29:01.380771",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.060790",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "841d2b02-ad06-40d2-b11f-c7adccec6ca2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:32.528083Z",
     "iopub.status.busy": "2024-12-04T13:34:32.527910Z",
     "iopub.status.idle": "2024-12-04T13:34:32.530678Z",
     "shell.execute_reply": "2024-12-04T13:34:32.530379Z",
     "shell.execute_reply.started": "2024-12-04T13:34:32.528070Z"
    },
    "papermill": {
     "duration": 0.121409,
     "end_time": "2024-11-23T14:29:01.638126",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.516717",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "expr_version = 'split_01_2_markdown_header_text_split'\n",
    "\n",
    "preprocess_output_dir = os.path.join(os.path.pardir, 'outputs', 'v1_20240713')\n",
    "expr_dir = os.path.join(os.path.pardir, 'experiments', expr_version)\n",
    "\n",
    "os.makedirs(expr_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf7e81e3-4c82-4842-aef5-7592caaf1d39",
   "metadata": {
    "papermill": {
     "duration": 0.100379,
     "end_time": "2024-11-23T14:29:01.862379",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.762000",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 读取文档"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e6920e29-bc7d-4635-be06-d151eaf0e100",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:32.531232Z",
     "iopub.status.busy": "2024-12-04T13:34:32.531112Z",
     "iopub.status.idle": "2024-12-04T13:34:34.318986Z",
     "shell.execute_reply": "2024-12-04T13:34:34.318540Z",
     "shell.execute_reply.started": "2024-12-04T13:34:32.531220Z"
    },
    "papermill": {
     "duration": 2.012298,
     "end_time": "2024-11-23T14:29:03.974974",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.962676",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "\n",
    "loader = PyPDFLoader(os.path.join(os.path.pardir, 'data', '2024全球经济金融展望报告.pdf'))\n",
    "\n",
    "pdf_documents = loader.load()\n",
    "markdown_documents = open(os.path.join(os.path.pardir, 'outputs', 'MinerU_parsed_20241204', '2024全球经济金融展望报告.md')).read()\n",
    "\n",
    "qa_df = pd.read_excel(os.path.join(preprocess_output_dir, 'question_answer.xlsx'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "841ec659-4ad7-4e1f-b1ea-3477bf97fde3",
   "metadata": {
    "papermill": {
     "duration": 0.100297,
     "end_time": "2024-11-23T14:29:04.219302",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.119005",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 文档切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "74fe856a-7c19-4c3c-bb30-7abfa6298f74",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.319656Z",
     "iopub.status.busy": "2024-12-04T13:34:34.319491Z",
     "iopub.status.idle": "2024-12-04T13:34:34.327193Z",
     "shell.execute_reply": "2024-12-04T13:34:34.326761Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.319643Z"
    },
    "papermill": {
     "duration": 0.109229,
     "end_time": "2024-11-23T14:29:04.429069",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.319840",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "from langchain.text_splitter import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
    "from uuid import uuid4\n",
    "\n",
    "def split_pdf_docs(documents, filepath, chunk_size=400, chunk_overlap=40, seperators=['\\n\\n\\n', '\\n\\n'], force_split=False):\n",
    "    if os.path.exists(filepath) and not force_split:\n",
    "        print('found cache, restoring...')\n",
    "        return pickle.load(open(filepath, 'rb'))\n",
    "\n",
    "    splitter = RecursiveCharacterTextSplitter(\n",
    "        chunk_size=chunk_size,\n",
    "        chunk_overlap=chunk_overlap,\n",
    "        separators=seperators\n",
    "    )\n",
    "    split_docs = splitter.split_documents(documents)\n",
    "    for chunk in split_docs:\n",
    "        chunk.metadata['uuid'] = str(uuid4())\n",
    "\n",
    "    pickle.dump(split_docs, open(filepath, 'wb'))\n",
    "\n",
    "    return split_docs\n",
    "\n",
    "def split_md_docs(markdown_document):\n",
    "    headers_to_split_on = [\n",
    "        (\"#\", \"Header 1\"),\n",
    "        (\"##\", \"Header 2\"),\n",
    "        (\"###\", \"Header 3\"),\n",
    "    ]\n",
    "    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on)\n",
    "    md_header_splits = markdown_splitter.split_text(markdown_document)\n",
    "\n",
    "    return md_header_splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "aa25540d-0504-4ae7-9804-9e3862b132d5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.328540Z",
     "iopub.status.busy": "2024-12-04T13:34:34.328415Z",
     "iopub.status.idle": "2024-12-04T13:34:34.340335Z",
     "shell.execute_reply": "2024-12-04T13:34:34.339905Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.328528Z"
    },
    "papermill": {
     "duration": 0.145583,
     "end_time": "2024-11-23T14:29:04.677429",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.531846",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "found cache, restoring...\n"
     ]
    }
   ],
   "source": [
    "pdf_splitted_docs = split_pdf_docs(pdf_documents, os.path.join(preprocess_output_dir, 'split_docs.pkl'), chunk_size=500, chunk_overlap=50)\n",
    "md_splitted_docs = split_md_docs(markdown_documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c015e2ab-c5f6-4621-ba2a-9c7f26d887ae",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.340853Z",
     "iopub.status.busy": "2024-12-04T13:34:34.340736Z",
     "iopub.status.idle": "2024-12-04T13:34:34.345222Z",
     "shell.execute_reply": "2024-12-04T13:34:34.344877Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.340842Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content='研究院\\n全球经济金融展望报告\\n要点2024年年报（总第57期） 报告日期：2023年12月12日\\n●2023年全球经济增长动力持续回落，各国复苏分化，\\n发达经济体增速明显放缓，新兴经济体整体表现稳定。\\n全球贸易增长乏力，各国生产景气度逐渐回落，内需\\n对经济的拉动作用减弱。欧美央行货币政策紧缩态势\\n放缓，美元指数高位震荡后走弱，全球股市表现总体\\n好于预期，但区域分化明显。高利率环境抑制债券融\\n资需求，债券违约风险持续上升。\\n●展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策走势将进一步分化。欧美\\n央行大概率结束本轮紧缩货币周期，美元指数将逐步\\n走弱，流向新兴经济体的跨境资本将增加。国际原油\\n市场短缺格局或延续，新能源发展成为重点。\\n●海湾六国经济发展与投资前景、高利率和高债务对\\n美国房地产市场脆弱性的影响等热点问题值得关注。中国银行研究院\\n全球经济金融研究课题组\\n组长：陈卫东\\n副组长：钟红\\n廖淑萍\\n成员：边卫红\\n熊启跃\\n王有鑫\\n曹鸿宇\\n李颖婷\\n王宁远\\n初晓\\n章凯莉\\n黄小军（纽约）\\n陆晓明（纽约）\\n黄承煜（纽约）\\n宋达志（伦敦）\\n李振龙（伦敦）\\n张传捷（伦敦）\\n刘冰彦（法兰克福）\\n温颍坤（法兰克福）\\n张明捷（法兰克福）\\n王哲（东京）\\n李彧（香港）\\n黎永康（香港）\\n联系人：王有鑫\\n电话：010-66594127\\n邮件：wangyouxin_hq@bank-of-china.com主要经济体GDP增速变化趋势（%）\\n资料来源：IMF，中国银行研究院', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 0, 'uuid': 'e73a0c9d-d42b-4350-a4c3-b38bf67c68a5'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 1 2024年\\n全球经济复苏疲软，货币政策取向分化\\n——中国银行全球经济金融展望报告（2024年）\\n2023年，全球经济增长动力持续回落。分区域看，各国复苏存在较大差异，\\n发达经济体增速明显放缓，新兴经济体增速与2022年大致持平。生产端，全球\\n供应链持续恢复，但生产景气度逐渐回落。需求端，内需对经济的拉动作用逐\\n渐减弱，各国国内投资和跨境投资均持续承压；全球货物贸易量指数和价格指\\n数下行，主要经济体出口贸易同比增速下降。欧美央行货币政策延续收紧态势，\\n但步伐整体放缓；金融体系短期资金运行发生结构性变化，“去存款化”特征\\n突出。美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化显\\n著。高利率环境抑制债券融资需求，债券违约风险持续上升，美国政府债务可\\n持续性问题引发市场关注。展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策将进一步分化。欧美央行大概率结束本轮加息周期，\\n日本央行可能退出负利率政策，跨境资本回流美国趋势将放缓，流向新兴经济\\n体的资金将增加。美元指数将逐步走弱，新兴经济体货币汇率有望回升。国际\\n原油市场短缺格局或延续，新能源发展成为重点。本期报告分别对海湾六国经\\n济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响两个专题\\n展开分析。\\n一、全球经济回顾与展望\\n（一）全球经济将在波动分化中筑底复苏\\n2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政\\n治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023\\n年全球GDP增速为2.7%（市场汇率法），较2022年下降0.3个百分点。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 2, 'uuid': '41d95288-441d-4c02-948a-6a3f0f4ef3ba'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 2 2024年\\n图1：全球GDP增速（%）\\n资料来源：IMF，中国银行研究院\\n分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明\\n显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经\\n济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英\\n国GDP环比增速均由之前的正增长转为负增长，分别下降0.1%和0.03%；美\\n国GDP环比增长折年率为4.9%，比二季度增速高2.8个百分点。新兴经济体增\\n速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，\\n东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中\\n东欧国家经济增速加快（图2）。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 3, 'uuid': '1f406690-b478-43cd-96f8-cd77924e300e'})]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdf_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "543f6f4e-28c1-4238-ae99-9abab95c2318",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.345838Z",
     "iopub.status.busy": "2024-12-04T13:34:34.345719Z",
     "iopub.status.idle": "2024-12-04T13:34:34.351484Z",
     "shell.execute_reply": "2024-12-04T13:34:34.351089Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.345826Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'Header 1': '全球经济金融展望报告'}, page_content='2024年年报（总第57期）  \\n报告日期：2023年12月12日'),\n",
       " Document(metadata={'Header 1': '要点'}, page_content='●2023 年全球经济增长动力持续回落，各国复苏分化，发达经济体增速明显放缓，新兴经济体整体表现稳定。全球贸易增长乏力，各国生产景气度逐渐回落，内需对经济的拉动作用减弱。欧美央行货币政策紧缩态势放缓，美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化明显。高利率环境抑制债券融资需求，债券违约风险持续上升。  \\n$\\\\bullet$ 展望2024 年，预计全球经济复苏将依旧疲软，主要经济体增长态势和货币政策走势将进一步分化。欧美央行大概率结束本轮紧缩货币周期，美元指数将逐步走弱，流向新兴经济体的跨境资本将增加。国际原油市场短缺格局或延续，新能源发展成为重点。  \\n$\\\\bullet$ 海湾六国经济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响等热点问题值得关注。  \\n![](images/c7e6ce1606712e84e07a05bcf6016906efa3fc778e40fcd0e91ac4fcb5503b79.jpg)\\n主要经济体GDP 增速变化趋势（%）\\n资料来源：IMF，中国银行研究院'),\n",
       " Document(metadata={'Header 1': '中国银行研究院全球经济金融研究课题组'}, page_content='![](images/a5d0eb181c75231451c8f890ec50fe5822e2306a9beb543ca35a04880abbf639.jpg)  \\n联系人：王有鑫\\n电话：010-66594127\\n邮件： wangyouxin_hq@bank-of-china.com')]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "md_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4fb9cf39-1221-4b46-ab92-b300dc261c8e",
   "metadata": {},
   "source": [
    "## 检查一下切分后的块长度分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c76b31aa-28af-430b-a62c-8879905176b7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.352059Z",
     "iopub.status.busy": "2024-12-04T13:34:34.351939Z",
     "iopub.status.idle": "2024-12-04T13:34:34.359383Z",
     "shell.execute_reply": "2024-12-04T13:34:34.358874Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.352047Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      52.000000\n",
       "mean      623.307692\n",
       "std       258.763920\n",
       "min        65.000000\n",
       "25%       476.750000\n",
       "50%       618.000000\n",
       "75%       801.250000\n",
       "max      1306.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in pdf_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "91e17fe4-4ef8-4768-932e-ed9cfb76eef6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.360189Z",
     "iopub.status.busy": "2024-12-04T13:34:34.359867Z",
     "iopub.status.idle": "2024-12-04T13:34:34.366816Z",
     "shell.execute_reply": "2024-12-04T13:34:34.366429Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.360163Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      43.000000\n",
       "mean      749.395349\n",
       "std       673.945036\n",
       "min        33.000000\n",
       "25%       241.000000\n",
       "50%       462.000000\n",
       "75%      1075.500000\n",
       "max      2839.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in md_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7872f43-d308-4eed-9dc0-9ef73cd96ba9",
   "metadata": {},
   "source": [
    "## 检查超长块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a0370f48-6a02-4aac-a841-5a911182a4af",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.367456Z",
     "iopub.status.busy": "2024-12-04T13:34:34.367334Z",
     "iopub.status.idle": "2024-12-04T13:34:34.373187Z",
     "shell.execute_reply": "2024-12-04T13:34:34.372764Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.367444Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "page_content='2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023年全球GDP增速为 $2.7\\%$ （市场汇率法），较2022年下降0.3个百分点。  \n",
      "![](images/7600acb45b91442f8127f20629c791d91f04827835929cb12612c409fde82574.jpg)\n",
      "图1：全球GDP增速 $(\\%)$ ）  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英国GDP环比增速均由之前的正增长转为负增长，分别下降 $0.1\\%$ 和 $0.03\\%$ ；美国GDP环比增长折年率为 $4.9\\%$ ，比二季度增速高2.8个百分点。新兴经济体增速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中东欧国家经济增速加快（图2）。  \n",
      "![](images/abf30ccab508a0c4733d58e3810cda53dabdaeb4239acf37e57a931a0296d80c.jpg)\n",
      "图2：主要经济体GDP增速变化趋势（%）\n",
      "注：东盟五国包含印度尼西亚、马来西亚、菲律宾、新加坡和泰国。  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "从生产端看，全球供应链持续恢复，但生产景气度逐渐回落。截至2023年10月底，纽约联储全球供应链压力指数降至有记录以来的最低值。荷兰经济分析局数据显示，全球工业生产量于4月触及年内低位，5-8月逐月回升，但发达经济体和新兴经济体分化明显（图3）。其中，主要新兴经济体工业生产指数普遍走高，如俄罗斯、土耳其、南非等，而发达经济体中的美国和韩国回升，英国、德国、意大利下行，日本波动较大，整体趋于平稳。全球融资环境收紧和经济下行压力对工业生产前景带来较大影响，全球制造业PMI指数明显回落，从2月的 $49.9\\%$ 降至10月的 $48.8\\%$ 。  \n",
      "![](images/7d2b17776c10d8fc38a113a20b40791a9e65da33b4209516d0bde88163bee3ea.jpg)\n",
      "图3：部分经济体工业生产指数变化趋势（2010年 $\\mathbf{-100}\\rangle$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院  \n",
      "从需求端看，内需是支撑发达经济体增长的主要动力，但对经济的拉动作用逐渐减弱。美国消费未受加息明显影响，私人消费维持稳定增长，前三季度对美国经济增长的贡献率高达 $64.4\\%$ ；8-9月，美国零售和食品销售额连续两个月环比增速保持在 $0.7\\%$ 以上，高于市场预期，但10月增速大幅回落至 $-0.1\\%$ 。欧洲各国消费指数整体维持稳定（图4），是上半年免于陷入衰退的主要动力。但随着高利率和高通胀持续，对消费的影响逐渐释放，内需增长动力逐渐弱化，全球服务业PMI指数从二季度开始明显回落，从5月的 $55.5\\%$ 降至10月的 $50.4\\%$ 连续5个月下行；OECD消费者信心指数从7月开始连续3个月回落。  \n",
      "![](images/3876098e7c8b21ca208f46cd2b25aa420574a706ae2648c774fcf130fac892db.jpg)\n",
      "图4：部分欧洲国家零售销售指数\n",
      "注：除英国是以2019年为基年外，其他经济体均为2015年为基年。资料来源：Wind，中国银行研究院  \n",
      "发达经济体投资受加息政策影响较大，国内投资和跨境投资均持续承压。美国私人投资在2023年一季度触底后逐渐反弹，三季度存货及住宅投资恢复增长，带动私人投资增速提升至 $8.4\\%$ （经季调后环比折年率），但制造业和设备投资均放缓，环比增长折年率分别降低 $0.1\\%$ 和 $3.8\\%$ 。欧盟投资增速放缓，房地产投资减少。2023年二季度，欧元区固定资本形成总额环比增长 $0.1\\%$ ，比一季度增速下降0.3个百分点，房地产对GDP环比增长拉动率转为负值。在紧缩货币政策影响下，发达经济体企业部门宏观杠杆率下降，企业加杠杆或负债投资意愿不足。同2022年底相比，2023年二季度，美国、英国、法国、意大利和德国非金融企业部门负债率分别下降了2.4个、3.4个、4.0个、3.0个和1.3个百分点（图5）。IMF预测2023年全球投资率（投资占GDP的比重）将下降1.0个百分点至 $26.4\\%$ （图6），其中，欧盟将下降1.1个百分点，比发达经济体平均降幅高0.2个百分点。从跨境投资角度看，受地缘政治局势紧张、金融领域动荡加剧、高利率和投资审查趋严等影响，并购交易仍然疲软，而在全球产业链重塑背景下，东南亚等区域绿地投资恢复增长。联合国贸发会议预计2023年全球跨境直接投资将继续下行，但降幅较2022年收窄。  \n",
      "![](images/876898312b7f8b55b06bc9b09f7a585aebeb663d67fe281dfb4ed939588a8d6e.jpg)\n",
      "图5：部分发达国家非金融企业部门债务率（%）\n",
      "资料来源：IIF，中国银行研究院\n",
      "图6：全球投资率变化趋势 $(\\,\\%)$ ）  \n",
      "![](images/5c049cdfb254bf5b7720e25c09f7e2e434c5c77b74897dfb28ee5154f4ef318e.jpg)\n",
      "资料来源：IMF，中国银行研究院  \n",
      "从国际贸易角度看，全球货物贸易量和价格指数均承压下行，主要经济体出口贸易同比增速下降。荷兰经济分析局数据显示，2023年1-8月，全球货物贸易量指数和价格指数均震荡下行，8月数值比1月分别下降0.9个和4.3个点（图7）。10月，世贸组织将2023年全球货物贸易增速预测值下调0.9个百分点至 $0.8\\%$ ，2023年国际贸易增长或为近几年最低水平。但近期东亚、东南亚等主要经济体出口下行趋势收窄，贸易呈现企稳迹象。9-10月，越南出口结束连续10个月的负增长态势，同比分别增长 $5.0\\%$ 和 $6.7\\%$ 。10月，韩国出口同比增长 $5.1\\%$ ，是自2022年10月以来首次正增长。  \n",
      "![](images/5826ae44f43ef12c95089d898a8b9375a7e989c7ba7a6de6388cbdd174b65516.jpg)\n",
      "图7：全球货物贸易量指数和货物贸易价格指数（2010年 $\\mathbf{-100}.$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院' metadata={'Header 1': '（一）全球经济将在波动分化中筑底复苏'}\n"
     ]
    }
   ],
   "source": [
    "for d in md_splitted_docs:\n",
    "    if len(d.page_content) > 2000:\n",
    "        print(d)\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "220dbc3a-fceb-4e49-a3f1-01e16660b2a6",
   "metadata": {
    "papermill": {
     "duration": 0.100209,
     "end_time": "2024-11-23T14:29:05.255871",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.155662",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "8598a11c-25d8-4af1-a98b-06a8c394e261",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:34.373975Z",
     "iopub.status.busy": "2024-12-04T13:34:34.373671Z",
     "iopub.status.idle": "2024-12-04T13:34:35.207079Z",
     "shell.execute_reply": "2024-12-04T13:34:35.206622Z",
     "shell.execute_reply.started": "2024-12-04T13:34:34.373947Z"
    },
    "papermill": {
     "duration": 0.989203,
     "end_time": "2024-11-23T14:29:06.345534",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.356331",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "device: cuda\n"
     ]
    }
   ],
   "source": [
    "from langchain.embeddings import HuggingFaceBgeEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "import torch\n",
    "\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "print(f'device: {device}')\n",
    "\n",
    "def get_embeddings(model_path):\n",
    "    embeddings = HuggingFaceBgeEmbeddings(\n",
    "        model_name=model_path,\n",
    "        model_kwargs={'device': device},\n",
    "        encode_kwargs={'normalize_embeddings': True},\n",
    "        # show_progress=True\n",
    "        query_instruction='为这个句子生成表示以用于检索相关文章：'\n",
    "    )\n",
    "    return embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "f6f46c73-7369-448f-a89a-ed3d817cad47",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:35.207973Z",
     "iopub.status.busy": "2024-12-04T13:34:35.207583Z",
     "iopub.status.idle": "2024-12-04T13:34:38.110221Z",
     "shell.execute_reply": "2024-12-04T13:34:38.109725Z",
     "shell.execute_reply.started": "2024-12-04T13:34:35.207960Z"
    },
    "papermill": {
     "duration": 83.983138,
     "end_time": "2024-11-23T14:35:06.117207",
     "exception": false,
     "start_time": "2024-11-23T14:33:42.134069",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "from langchain_community.vectorstores import Chroma\n",
    "\n",
    "model_path = 'BAAI/bge-large-zh-v1.5'\n",
    "embeddings = get_embeddings(model_path)\n",
    "\n",
    "def get_vector_db(splitted_docs, embeddings, name):\n",
    "    persist_directory = os.path.join(expr_dir, 'chroma', 'bge', name)\n",
    "    shutil.rmtree(persist_directory, ignore_errors=True)\n",
    "    vector_db = Chroma.from_documents(\n",
    "        splitted_docs,\n",
    "        embedding=embeddings,\n",
    "        persist_directory=persist_directory\n",
    "    )\n",
    "    return vector_db"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3318f9bb-a7f8-4c44-bf8d-302b71dca44c",
   "metadata": {},
   "source": [
    "使用新的切分方式，每个切片的UUID跟原始切片不一致了，检索的Ground Truth丢失了，此处通过向量检索的方式，将原始的UUID复制到Markdown的切片上，方便后续排查检索问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "b48362cc-5776-4f1c-8feb-64b1a4a675e8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:38.110873Z",
     "iopub.status.busy": "2024-12-04T13:34:38.110629Z",
     "iopub.status.idle": "2024-12-04T13:34:55.349872Z",
     "shell.execute_reply": "2024-12-04T13:34:55.349302Z",
     "shell.execute_reply.started": "2024-12-04T13:34:38.110861Z"
    }
   },
   "outputs": [],
   "source": [
    "pdf_vector_db = get_vector_db(pdf_splitted_docs, embeddings, 'pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "dabf2d44-5afa-41f4-bd6c-1cbaaf00e571",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:55.350564Z",
     "iopub.status.busy": "2024-12-04T13:34:55.350332Z",
     "iopub.status.idle": "2024-12-04T13:34:57.844555Z",
     "shell.execute_reply": "2024-12-04T13:34:57.844071Z",
     "shell.execute_reply.started": "2024-12-04T13:34:55.350550Z"
    }
   },
   "outputs": [],
   "source": [
    "# 以新切片作为query，查询旧切片中最相似的那个，将它的UUID复制到新切片中\n",
    "for doc in md_splitted_docs:\n",
    "    query = doc.page_content\n",
    "    # 只检索最相似的那个\n",
    "    chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n",
    "    doc.metadata['uuid'] = chunk_score_pair[0].metadata['uuid']\n",
    "    doc.metadata['pdf_chunk_sim'] = chunk_score_pair[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a655299-15f5-44b1-925d-5137a1e1c881",
   "metadata": {},
   "source": [
    "chunk_score_pair的结构如下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "46817374-ceb2-486a-a7f2-240c2abc98f0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:57.845163Z",
     "iopub.status.busy": "2024-12-04T13:34:57.845035Z",
     "iopub.status.idle": "2024-12-04T13:34:57.848083Z",
     "shell.execute_reply": "2024-12-04T13:34:57.847773Z",
     "shell.execute_reply.started": "2024-12-04T13:34:57.845151Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Document(metadata={'page': 51, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': 'ebf0d999-59f6-4fd3-941e-05a7a60c255a'}, page_content='免责声明\\n本研究报告由中国银行研究院撰写，研究报告中所引用信息均来自公开资料。\\n本研究报告中包含的观点或估计仅代表作者迄今为止的判断，它们不一定反映中国银行的观点。中国\\n银行研究院可以不经通知加以改变，且没有对此报告更新、修正或修改的责任。\\n本研究报告内容及观点仅供参考，不构成任何投资建议。对于本报告所提供信息所导致的任何直接的\\n或者间接的投资盈亏后果不承担任何责任。\\n本研究报告版权仅为中国银行研究院所有，未经书面许可，任何机构和个人不得以任何形式翻版、复\\n制和发布。如引用发布，需注明出处为中国银行研究院，且不得对本报告进行有悖原意的引用、删节和修\\n改。中国银行研究院保留对任何侵权行为和有悖报告原意的引用行为进行追究的权利。'),\n",
       " 0.8547903630738032)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chunk_score_pair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "62560d74-7d90-4e69-ae43-162b248e1622",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:34:57.848710Z",
     "iopub.status.busy": "2024-12-04T13:34:57.848559Z",
     "iopub.status.idle": "2024-12-04T13:35:14.844796Z",
     "shell.execute_reply": "2024-12-04T13:35:14.842419Z",
     "shell.execute_reply.started": "2024-12-04T13:34:57.848698Z"
    }
   },
   "outputs": [],
   "source": [
    "md_vector_db = get_vector_db(md_splitted_docs, embeddings, 'md')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55d51ebc-b29d-45be-b8c7-1d5610b270b8",
   "metadata": {},
   "source": [
    "# 计算检索准确率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "ad8ef473-7ad8-43d4-8b9a-9890cf3bf4c6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:14.848427Z",
     "iopub.status.busy": "2024-12-04T13:35:14.847681Z",
     "iopub.status.idle": "2024-12-04T13:35:14.861501Z",
     "shell.execute_reply": "2024-12-04T13:35:14.859207Z",
     "shell.execute_reply.started": "2024-12-04T13:35:14.848359Z"
    }
   },
   "outputs": [],
   "source": [
    "test_df = qa_df[(qa_df['dataset'] == 'test') & (qa_df['qa_type'] == 'detailed')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "070b78ef-3140-4e59-886c-09c5184a8ee9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:14.865399Z",
     "iopub.status.busy": "2024-12-04T13:35:14.864670Z",
     "iopub.status.idle": "2024-12-04T13:35:14.878878Z",
     "shell.execute_reply": "2024-12-04T13:35:14.876613Z",
     "shell.execute_reply.started": "2024-12-04T13:35:14.865330Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "93"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(test_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "435148a0-b2b1-49fb-8eea-2ad117c0b9d4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:14.881679Z",
     "iopub.status.busy": "2024-12-04T13:35:14.880987Z",
     "iopub.status.idle": "2024-12-04T13:35:14.897352Z",
     "shell.execute_reply": "2024-12-04T13:35:14.895113Z",
     "shell.execute_reply.started": "2024-12-04T13:35:14.881613Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_hit_stat_df(vector_db, top_k_arr=list(range(1, 9))):\n",
    "    hit_stat_data = []\n",
    "\n",
    "    for k in tqdm(top_k_arr):\n",
    "        for idx, row in test_df.iterrows():\n",
    "            question = row['question']\n",
    "            true_uuid = row['uuid']\n",
    "            # chunks = retrieve_fn(question, k=k)\n",
    "            chunks = vector_db.similarity_search(question, k=k)\n",
    "            retrieved_uuids = [doc.metadata['uuid'] for doc in chunks]\n",
    "\n",
    "            hit_stat_data.append({\n",
    "                'question': question,\n",
    "                'top_k': k,\n",
    "                'hit': int(true_uuid in retrieved_uuids),\n",
    "                'retrieved_chunks': len(chunks)\n",
    "            })\n",
    "    hit_stat_df = pd.DataFrame(hit_stat_data)\n",
    "    return hit_stat_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "01e01af2-9f53-462a-bcb1-2864864e6488",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:14.909765Z",
     "iopub.status.busy": "2024-12-04T13:35:14.909033Z",
     "iopub.status.idle": "2024-12-04T13:35:35.001613Z",
     "shell.execute_reply": "2024-12-04T13:35:35.001080Z",
     "shell.execute_reply.started": "2024-12-04T13:35:14.909697Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1a4b4ecd66e7404dbac9a29c1cf8e052",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "hit_stat_df = get_hit_stat_df(md_vector_db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "de0c3de0-92b5-4804-a374-108984640cf8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:35.002247Z",
     "iopub.status.busy": "2024-12-04T13:35:35.002105Z",
     "iopub.status.idle": "2024-12-04T13:35:35.009053Z",
     "shell.execute_reply": "2024-12-04T13:35:35.008709Z",
     "shell.execute_reply.started": "2024-12-04T13:35:35.002234Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>top_k</th>\n",
       "      <th>hit_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.225806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.311828</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.344086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.419355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.440860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>0.440860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>0.440860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>0.451613</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   top_k  hit_rate\n",
       "0      1  0.225806\n",
       "1      2  0.311828\n",
       "2      3  0.344086\n",
       "3      4  0.419355\n",
       "4      5  0.440860\n",
       "5      6  0.440860\n",
       "6      7  0.440860\n",
       "7      8  0.451613"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hit_stat_df.groupby(['top_k'])['hit'].mean().reset_index().rename(columns={'hit': 'hit_rate'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "ccc0dca9-8ad6-4d0c-a6e1-8279babbdfbf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:35.009601Z",
     "iopub.status.busy": "2024-12-04T13:35:35.009476Z",
     "iopub.status.idle": "2024-12-04T13:35:35.364471Z",
     "shell.execute_reply": "2024-12-04T13:35:35.364014Z",
     "shell.execute_reply.started": "2024-12-04T13:35:35.009589Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='top_k', ylabel='hit'>"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGxCAYAAACeKZf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAgFklEQVR4nO3df1SUdf738deAAZqImYKKKKltqKWUiIuu6RblWseN/dFSx01C1z33SUp37rrVLMjSsNZIT7mS5o/uyrTd7dfuGv2YOyyLwlBLt7Qfm2EaoGc3UbJhm5n7j05TfEUDZeYa3jwf58w5XhfXxbw/ecync13MuAKBQEAAAABGRDk9AAAAQFsibgAAgCnEDQAAMIW4AQAAphA3AADAFOIGAACYQtwAAABTiBsAAGBKJ6cHCDe/368DBw4oPj5eLpfL6XEAAEALBAIBHTlyRH379lVU1Mlfm+lwcXPgwAGlpKQ4PQYAADgF+/btU79+/U56TIeLm/j4eEnf/Mfp1q2bw9MAAICWqK+vV0pKSvDv8ZPpcHHz7aWobt26ETcAALQzLbmlhBuKAQCAKcQNAAAwhbgBAACmEDcAAMAU4gYAAJhC3AAAAFOIGwAAYApxAwAATCFuAACAKcQNAAAwhbgBAACmEDcAAMAU4gYAAJhC3AAAAFOIGwAAYEonpwcAAABt44477nB6hDZxuuvglRsAAGAKcQMAAEwhbgAAgCnEDQAAMIW4AQAAphA3AADAFOIGAACYwvvcAOgQHvzff3N6hDZRcN/kVh2/6Le/DtEk4TX/sb+06vj3F/2/EE0SXkPmX+L0CO0Sr9wAAABTiBsAAGAKcQMAAEwhbgAAgCnEDQAAMIW4AQAAphA3AADAFOIGAACYQtwAAABTiBsAAGAKcQMAAEwhbgAAgCnEDQAAMIW4AQAAphA3AADAFOIGAACY0snpAQCE1+aLxzs9QpsY/+pmp0cAEKF45QYAAJhC3AAAAFOIGwAAYApxAwAATCFuAACAKcQNAAAwhbgBAACmEDcAAMAU4gYAAJgSEXGzfPlypaamKi4uTqNHj1ZlZWWLztuwYYNcLpdycnJCOyAAAGg3HI+bjRs3yu12q6ioSNu2bdOIESM0ceJE1dXVnfS8vXv36uabb9a4cePCNCkAAGgPHI+bkpISzZgxQ/n5+Ro6dKhKS0vVpUsXrVmz5oTn+Hw+TZkyRQsWLNDAgQPDOC0AAIh0jsZNY2OjqqqqlJ2dHdwXFRWl7OxsVVRUnPC8O++8U4mJiZo+fXo4xgQAAO2Io58KfujQIfl8PiUlJTXZn5SUpN27dzd7zpYtW7R69Wrt2LGjRc/h9Xrl9XqD2/X19ac8LwAAiHyOX5ZqjSNHjui6667TqlWr1LNnzxadU1xcrISEhOAjJSUlxFMCAAAnOfrKTc+ePRUdHa3a2tom+2tra9W7d+/jjv/444+1d+9eTZ48ObjP7/dLkjp16qQ9e/Zo0KBBTc6ZN2+e3G53cLu+vp7AAQDAMEfjJiYmRiNHjpTH4wn+OLff75fH41FBQcFxx6elpWnnzp1N9t122206cuSIli1b1my0xMbGKjY2NiTzAwCAyONo3EiS2+1WXl6eMjIylJmZqaVLl6qhoUH5+fmSpKlTpyo5OVnFxcWKi4vT+eef3+T87t27S9Jx+wEAQMfkeNzk5ubq4MGDKiwsVE1NjdLT01VWVha8ybi6ulpRUe3q1iAAAOAgx+NGkgoKCpq9DCVJ5eXlJz133bp1bT8QAABot3hJBAAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApnZweAHDK2AfGOj1Cm3j9xtedHgEAIgqv3AAAAFOIGwAAYApxAwAATCFuAACAKcQNAAAwhbgBAACmEDcAAMAU4gYAAJhC3AAAAFOIGwAAYApxAwAATCFuAACAKcQNAAAwhbgBAACmEDcAAMAU4gYAAJhC3AAAAFOIGwAAYApxAwAATCFuAACAKcQNAAAwhbgBAACmEDcAAMAU4gYAAJhC3AAAAFM6OT0AnFd95wVOj9Am+hfudHoEAEAE4JUbAABgCnEDAABMIW4AAIApERE3y5cvV2pqquLi4jR69GhVVlae8NinnnpKGRkZ6t69u84880ylp6fr0UcfDeO0AAAgkjkeNxs3bpTb7VZRUZG2bdumESNGaOLEiaqrq2v2+B49emj+/PmqqKjQu+++q/z8fOXn5+uFF14I8+QAACASOR43JSUlmjFjhvLz8zV06FCVlpaqS5cuWrNmTbPHT5gwQb/4xS80ZMgQDRo0SLNmzdLw4cO1ZcuWME8OAAAikaNx09jYqKqqKmVnZwf3RUVFKTs7WxUVFT94fiAQkMfj0Z49e3TxxReHclQAANBOOPo+N4cOHZLP51NSUlKT/UlJSdq9e/cJzzt8+LCSk5Pl9XoVHR2tP/3pT7rsssuaPdbr9crr9Qa36+vr22Z4AAAQkdrlm/jFx8drx44dOnr0qDwej9xutwYOHKgJEyYcd2xxcbEWLFgQ/iEBAIAjHI2bnj17Kjo6WrW1tU3219bWqnfv3ic8LyoqSoMHD5Ykpaen6/3331dxcXGzcTNv3jy53e7gdn19vVJSUtpmAQAAIOI4es9NTEyMRo4cKY/HE9zn9/vl8XiUlZXV4u/j9/ubXHr6vtjYWHXr1q3JAwAA2OX4ZSm32628vDxlZGQoMzNTS5cuVUNDg/Lz8yVJU6dOVXJysoqLiyV9c5kpIyNDgwYNktfr1aZNm/Too49qxYoVTi4DAABECMfjJjc3VwcPHlRhYaFqamqUnp6usrKy4E3G1dXVior67gWmhoYG3XDDDfrss8/UuXNnpaWl6bHHHlNubq5TSwAAABHE8biRpIKCAhUUFDT7tfLy8ibbCxcu1MKFC8MwFQAAaI8cfxM/AACAtkTcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIAppxQ3l1xyib744ovj9tfX1+uSSy453ZkAAABO2SnFTXl5uRobG4/b/9VXX+m111477aEAAABOVafWHPzuu+8Gf/3ee++ppqYmuO3z+VRWVqbk5OS2mw4AAKCVWhU36enpcrlccrlczV5+6ty5sx544IE2Gw4AAKC1WhU3n3zyiQKBgAYOHKjKykr16tUr+LWYmBglJiYqOjq6zYcEAABoqVbFzYABAyRJfr8/JMMAAACcrhbHzXPPPadJkybpjDPO0HPPPXfSY3/+85+f9mAAAACnosVxk5OTo5qaGiUmJionJ+eEx7lcLvl8vraYDQAAoNVaHDffvxTFZSkAABCpWnXPzfd5PB55PB7V1dU1iR2Xy6XVq1e3yXAAAACtdUpxs2DBAt15553KyMhQnz595HK52nouAACAU3JKcVNaWqp169bpuuuua+t5AAAATsspffxCY2OjxowZ09azAAAAnLZTipvf/e53Wr9+fVvPAgAAcNpafFnK7XYHf+33+7Vy5Uq9/PLLGj58uM4444wmx5aUlLTdhAAAAK3Q4rjZvn17k+309HRJ0q5du5rsb883F4+85f86PUKbqPrjVKdHAADAMS2Om1deeSWUcwAAALSJU7rnBgAAIFIRNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgSkTEzfLly5Wamqq4uDiNHj1alZWVJzx21apVGjdunM466yydddZZys7OPunxAACgY3E8bjZu3Ci3262ioiJt27ZNI0aM0MSJE1VXV9fs8eXl5br22mv1yiuvqKKiQikpKbr88su1f//+ME8OAAAikeNxU1JSohkzZig/P19Dhw5VaWmpunTpojVr1jR7/OOPP64bbrhB6enpSktL08MPPyy/3y+PxxPmyQEAQCRyNG4aGxtVVVWl7Ozs4L6oqChlZ2eroqKiRd/jyy+/1H//+1/16NEjVGMCAIB2pJOTT37o0CH5fD4lJSU12Z+UlKTdu3e36HvMmTNHffv2bRJI3+f1euX1eoPb9fX1pz4wAACIeI5fljodixcv1oYNG/T0008rLi6u2WOKi4uVkJAQfKSkpIR5SgAAEE6Oxk3Pnj0VHR2t2traJvtra2vVu3fvk567ZMkSLV68WC+++KKGDx9+wuPmzZunw4cPBx/79u1rk9kBAEBkcjRuYmJiNHLkyCY3A397c3BWVtYJz7v33nt11113qaysTBkZGSd9jtjYWHXr1q3JAwAA2OXoPTeS5Ha7lZeXp4yMDGVmZmrp0qVqaGhQfn6+JGnq1KlKTk5WcXGxJOmee+5RYWGh1q9fr9TUVNXU1EiSunbtqq5duzq2DgAAEBkcj5vc3FwdPHhQhYWFqqmpUXp6usrKyoI3GVdXVysq6rsXmFasWKHGxkb9+te/bvJ9ioqKdMcdd4RzdAAAEIEcjxtJKigoUEFBQbNfKy8vb7K9d+/e0A8EAADarXb901IAAAD/E3EDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApjsfN8uXLlZqaqri4OI0ePVqVlZUnPPaf//ynfvWrXyk1NVUul0tLly4N36AAAKBdcDRuNm7cKLfbraKiIm3btk0jRozQxIkTVVdX1+zxX375pQYOHKjFixerd+/eYZ4WAAC0B47GTUlJiWbMmKH8/HwNHTpUpaWl6tKli9asWdPs8aNGjdIf//hHXXPNNYqNjQ3ztAAAoD1wLG4aGxtVVVWl7Ozs74aJilJ2drYqKiqcGgsAALRznZx64kOHDsnn8ykpKanJ/qSkJO3evbvNnsfr9crr9Qa36+vr2+x7AwCAyOP4DcWhVlxcrISEhOAjJSXF6ZEAAEAIORY3PXv2VHR0tGpra5vsr62tbdObhefNm6fDhw8HH/v27Wuz7w0AACKPY3ETExOjkSNHyuPxBPf5/X55PB5lZWW12fPExsaqW7duTR4AAMAux+65kSS32628vDxlZGQoMzNTS5cuVUNDg/Lz8yVJU6dOVXJysoqLiyV9cxPye++9F/z1/v37tWPHDnXt2lWDBw92bB0AACByOBo3ubm5OnjwoAoLC1VTU6P09HSVlZUFbzKurq5WVNR3Ly4dOHBAF154YXB7yZIlWrJkicaPH6/y8vJwjw8AACKQo3EjSQUFBSooKGj2a/8zWFJTUxUIBMIwFQAAaK/M/7QUAADoWIgbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmELcAAAAU4gbAABgCnEDAABMiYi4Wb58uVJTUxUXF6fRo0ersrLypMf/+c9/VlpamuLi4nTBBRdo06ZNYZoUAABEOsfjZuPGjXK73SoqKtK2bds0YsQITZw4UXV1dc0e/8Ybb+jaa6/V9OnTtX37duXk5CgnJ0e7du0K8+QAACASOR43JSUlmjFjhvLz8zV06FCVlpaqS5cuWrNmTbPHL1u2TD/72c90yy23aMiQIbrrrrt00UUX6cEHHwzz5AAAIBI5GjeNjY2qqqpSdnZ2cF9UVJSys7NVUVHR7DkVFRVNjpekiRMnnvB4AADQsXRy8skPHTokn8+npKSkJvuTkpK0e/fuZs+pqalp9viamppmj/d6vfJ6vcHtw4cPS5Lq6+uPO9bnPdaq+SNVc2s7mSNf+UI0SXi1dt1fH/s6RJOEV2vX3fB1x1z3Me+XIZokvFq77q/++98QTRJerV330a8aQjRJeLV23d//+649a27d3+4LBAI/eL6jcRMOxcXFWrBgwXH7U1JSHJgmPBIe+F9Oj+CM4gSnJ3BEwpyOuW4ldMx1/5/lTk/gjIVPdszfby10egBnLF68+IRfO3LkiBJ+4M+/o3HTs2dPRUdHq7a2tsn+2tpa9e7du9lzevfu3arj582bJ7fbHdz2+/3697//rbPPPlsul+s0V9A69fX1SklJ0b59+9StW7ewPreTWDfr7ghYN+vuCJxcdyAQ0JEjR9S3b98fPNbRuImJidHIkSPl8XiUk5Mj6Zv48Hg8KigoaPacrKwseTwezZ49O7jvpZdeUlZWVrPHx8bGKjY2tsm+7t27t8X4p6xbt24d6g/Dt1h3x8K6OxbW3bE4te4fesXmW45flnK73crLy1NGRoYyMzO1dOlSNTQ0KD8/X5I0depUJScnq7i4WJI0a9YsjR8/Xvfdd5+uvPJKbdiwQW+//bZWrlzp5DIAAECEcDxucnNzdfDgQRUWFqqmpkbp6ekqKysL3jRcXV2tqKjvfqhrzJgxWr9+vW677TbdeuutOvfcc/XMM8/o/PPPd2oJAAAggjgeN5JUUFBwwstQ5eXlx+27+uqrdfXVV4d4qrYXGxuroqKi4y6TWce6WXdHwLpZd0fQXtbtCrTkZ6oAAADaCcffoRgAAKAtETcAAMAU4gYAAJhC3ITBq6++qsmTJ6tv375yuVx65plnnB4pLIqLizVq1CjFx8crMTFROTk52rNnj9NjhdyKFSs0fPjw4PtAZGVl6fnnn3d6rLBbvHixXC5Xk/eksuiOO+6Qy+Vq8khLS3N6rLDYv3+/fvvb3+rss89W586ddcEFF+jtt992eqyQSk1NPe732+VyaebMmU6PFlI+n0+33367zjnnHHXu3FmDBg3SXXfd1aKPQnBCRPy0lHUNDQ0aMWKEpk2bpl/+8pdOjxM2mzdv1syZMzVq1Ch9/fXXuvXWW3X55Zfrvffe05lnnun0eCHTr18/LV68WOeee64CgYAeeeQRXXXVVdq+fbuGDRvm9HhhsXXrVj300EMaPny406OExbBhw/Tyyy8Htzt1sv+/1v/85z8aO3asfvrTn+r5559Xr1699OGHH+qss85yerSQ2rp1q3y+7z6Pb9euXbrsssva5U/wtsY999yjFStW6JFHHtGwYcP09ttvKz8/XwkJCbrpppucHu849v8ERoBJkyZp0qRJTo8RdmVlZU22161bp8TERFVVVeniiy92aKrQmzx5cpPtRYsWacWKFXrzzTc7RNwcPXpUU6ZM0apVq7RwYcf4YJxOnTqd8CNgrLrnnnuUkpKitWvXBvedc845Dk4UHr169WqyvXjxYg0aNEjjx493aKLweOONN3TVVVfpyiuvlPTNK1hPPPGEKisrHZ6seVyWQth8+4nsPXr0cHiS8PH5fNqwYYMaGhpO+BEh1sycOVNXXnmlsrOznR4lbD788EP17dtXAwcO1JQpU1RdXe30SCH33HPPKSMjQ1dffbUSExN14YUXatWqVU6PFVaNjY167LHHNG3atLB/VmG4jRkzRh6PRx988IEk6Z133tGWLVsi9h/uvHKDsPD7/Zo9e7bGjh3bId5NeufOncrKytJXX32lrl276umnn9bQoUOdHivkNmzYoG3btmnr1q1OjxI2o0eP1rp163Teeefp888/14IFCzRu3Djt2rVL8fHxTo8XMv/617+0YsUKud1u3Xrrrdq6datuuukmxcTEKC8vz+nxwuKZZ57RF198oeuvv97pUUJu7ty5qq+vV1pamqKjo+Xz+bRo0SJNmTLF6dGaRdwgLGbOnKldu3Zpy5YtTo8SFuedd5527Nihw4cP6y9/+Yvy8vK0efNm04Gzb98+zZo1Sy+99JLi4uKcHidsvv8v1+HDh2v06NEaMGCAnnzySU2fPt3ByULL7/crIyNDd999tyTpwgsv1K5du1RaWtph4mb16tWaNGlSiz6lur178skn9fjjj2v9+vUaNmyYduzYodmzZ6tv374R+ftN3CDkCgoK9Pe//12vvvqq+vXr5/Q4YRETE6PBgwdLkkaOHKmtW7dq2bJleuihhxyeLHSqqqpUV1eniy66KLjP5/Pp1Vdf1YMPPiiv16vo6GgHJwyP7t2760c/+pE++ugjp0cJqT59+hwX60OGDNFf//pXhyYKr08//VQvv/yynnrqKadHCYtbbrlFc+fO1TXXXCNJuuCCC/Tpp5+quLiYuEHHEggEdOONN+rpp59WeXl5h7jZ8ET8fr+8Xq/TY4TUpZdeqp07dzbZl5+fr7S0NM2ZM6dDhI30zQ3VH3/8sa677jqnRwmpsWPHHvfWDh988IEGDBjg0EThtXbtWiUmJgZvsLXuyy+/bPIh1pIUHR0tv9/v0EQnR9yEwdGjR5v8K+6TTz7Rjh071KNHD/Xv39/ByUJr5syZWr9+vZ599lnFx8erpqZGkpSQkKDOnTs7PF3ozJs3T5MmTVL//v115MgRrV+/XuXl5XrhhRecHi2k4uPjj7uf6swzz9TZZ59t+j6rm2++WZMnT9aAAQN04MABFRUVKTo6Wtdee63To4XUH/7wB40ZM0Z33323fvOb36iyslIrV67UypUrnR4t5Px+v9auXau8vLwO8WP/0jc/Bbpo0SL1799fw4YN0/bt21VSUqJp06Y5PVrzAgi5V155JSDpuEdeXp7To4VUc2uWFFi7dq3To4XUtGnTAgMGDAjExMQEevXqFbj00ksDL774otNjOWL8+PGBWbNmOT1GSOXm5gb69OkTiImJCSQnJwdyc3MDH330kdNjhcXf/va3wPnnnx+IjY0NpKWlBVauXOn0SGHxwgsvBCQF9uzZ4/QoYVNfXx+YNWtWoH///oG4uLjAwIEDA/Pnzw94vV6nR2sWnwoOAABM4X1uAACAKcQNAAAwhbgBAACmEDcAAMAU4gYAAJhC3AAAAFOIGwAAYApxAwAATCFuAHRoqampWrp0qdNjAGhDxA2AiDFhwgTNnj3b6TEAtHPEDQAAMIW4ARARrr/+em3evFnLli2Ty+WSy+XS3r17tXnzZmVmZio2NlZ9+vTR3Llz9fXXXwfPmzBhggoKClRQUKCEhAT17NlTt99+u071Y/Mefvhhde/eXR6Pp62WBiDMiBsAEWHZsmXKysrSjBkz9Pnnn+vzzz/XGWecoSuuuEKjRo3SO++8oxUrVmj16tVauHBhk3MfeeQRderUSZWVlVq2bJlKSkr08MMPt3qGe++9V3PnztWLL76oSy+9tK2WBiDMOjk9AABIUkJCgmJiYtSlSxf17t1bkjR//nylpKTowQcflMvlUlpamg4cOKA5c+aosLBQUVHf/PssJSVF999/v1wul8477zzt3LlT999/v2bMmNHi558zZ44effRRbd68WcOGDQvJGgGEB6/cAIhY77//vrKysuRyuYL7xo4dq6NHj+qzzz4L7vvxj3/c5JisrCx9+OGH8vl8LXqe++67T6tWrdKWLVsIG8AA4gZAhzdu3Dj5fD49+eSTTo8CoA0QNwAiRkxMTJNXW4YMGaKKioomNwe//vrrio+PV79+/YL73nrrrSbf580339S5556r6OjoFj1vZmamnn/+ed19991asmTJaa4CgNOIGwARIzU1VW+99Zb27t2rQ4cO6YYbbtC+fft04403avfu3Xr22WdVVFQkt9sdvN9Gkqqrq+V2u7Vnzx498cQTeuCBBzRr1qxWPfeYMWO0adMmLViwgDf1A9o5bigGEDFuvvlm5eXlaejQoTp27Jg++eQTbdq0SbfccotGjBihHj16aPr06brtttuanDd16lQdO3ZMmZmZio6O1qxZs/T73/++1c//k5/8RP/4xz90xRVXKDo6WjfeeGNbLQ1AGLkCp/pmEAAQASZMmKD09HRebQEQxGUpAABgCnEDwKzXXntNXbt2PeEDgE1clgJg1rFjx7R///4Tfn3w4MFhnAZAuBA3AADAFC5LAQAAU4gbAABgCnEDAABMIW4AAIApxA0AADCFuAEAAKYQNwAAwBTiBgAAmPL/AbLS1gqEWRCyAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "sns.barplot(x='top_k', y='hit', data=hit_stat_df, errorbar=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7925564a-7d30-4914-baaf-4a00abb7686d",
   "metadata": {
    "papermill": {
     "duration": 0.109216,
     "end_time": "2024-11-23T14:35:26.464009",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.354793",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 生成答案"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "27132c3b-0051-4df6-bf57-fd804acb8d17",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:35.365357Z",
     "iopub.status.busy": "2024-12-04T13:35:35.365059Z",
     "iopub.status.idle": "2024-12-04T13:35:35.437719Z",
     "shell.execute_reply": "2024-12-04T13:35:35.437257Z",
     "shell.execute_reply.started": "2024-12-04T13:35:35.365343Z"
    },
    "papermill": {
     "duration": 0.199165,
     "end_time": "2024-11-23T14:35:27.323500",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.124335",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1211099/3342461511.py:3: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
      "  ollama_llm = Ollama(\n"
     ]
    }
   ],
   "source": [
    "from langchain.llms import Ollama\n",
    "\n",
    "ollama_llm = Ollama(\n",
    "    model='qwen2:7b-instruct',\n",
    "    base_url='http://localhost:11434',\n",
    "    top_k=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "50404beb-3be0-4aaa-b124-8c7a52b84531",
   "metadata": {
    "editable": true,
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:35.438346Z",
     "iopub.status.busy": "2024-12-04T13:35:35.438212Z",
     "iopub.status.idle": "2024-12-04T13:35:35.442598Z",
     "shell.execute_reply": "2024-12-04T13:35:35.442223Z",
     "shell.execute_reply.started": "2024-12-04T13:35:35.438333Z"
    },
    "papermill": {
     "duration": 0.159318,
     "end_time": "2024-11-23T14:35:26.768506",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.609188",
     "status": "completed"
    },
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "def rag(vector_db, llm, query, n_chunks=4):\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个金融分析师，擅长根据所获取的信息片段，对问题进行分析和推理。\n",
    "你的任务是根据所获取的信息片段（<<<<context>>><<<</context>>>之间的内容）回答问题。\n",
    "回答保持简洁，不必重复问题，不要添加描述性解释和与答案无关的任何内容。\n",
    "已知信息：\n",
    "<<<<context>>>\n",
    "{{knowledge}}\n",
    "<<<</context>>>\n",
    "\n",
    "问题：{{query}}\n",
    "请回答：\n",
    "\"\"\".strip()\n",
    "    chunks = vector_db.similarity_search(query, k=n_chunks)\n",
    "    prompt = prompt_tmpl.replace('{{knowledge}}', '\\n\\n'.join([doc.page_content for doc in chunks])).replace('{{query}}', query)\n",
    "    retry_count = 3\n",
    "\n",
    "    resp = ''\n",
    "    while retry_count > 0:\n",
    "        try:\n",
    "            resp = llm.invoke(prompt)\n",
    "            break\n",
    "        except Exception as e:\n",
    "            retry_count -= 1\n",
    "            sleeping_seconds = 2 ** (4 - retry_count)\n",
    "            print(f\"query={query}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "            \n",
    "            time.sleep(sleeping_seconds)\n",
    "    \n",
    "    return resp, chunks"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95e5a804-2dc6-411c-ba71-6ccf765b2b73",
   "metadata": {
    "papermill": {
     "duration": 0.135973,
     "end_time": "2024-11-23T14:35:27.001401",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.865428",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "166392d8-f801-4372-b8ad-3e79aef0b350",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:35.443197Z",
     "iopub.status.busy": "2024-12-04T13:35:35.443067Z",
     "iopub.status.idle": "2024-12-04T13:35:35.449830Z",
     "shell.execute_reply": "2024-12-04T13:35:35.449411Z",
     "shell.execute_reply.started": "2024-12-04T13:35:35.443185Z"
    },
    "papermill": {
     "duration": 0.141864,
     "end_time": "2024-11-23T14:35:27.564409",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.422545",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "prediction_df = qa_df[qa_df['dataset'] == 'test'][['uuid', 'question', 'qa_type', 'answer']].rename(columns={'answer': 'ref_answer'})\n",
    "\n",
    "def predict(vector_db, llm, prediction_df, n_chunks):\n",
    "    prediction_df = prediction_df.copy()\n",
    "    answer_dict = {}\n",
    "\n",
    "    for idx, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        uuid = row['uuid']\n",
    "        question = row['question']\n",
    "        answer, chunks = rag(vector_db, llm, question, n_chunks=n_chunks)\n",
    "        assert len(chunks) <= n_chunks\n",
    "        answer_dict[question] = {\n",
    "            'uuid': uuid,\n",
    "            'ref_answer': row['ref_answer'],\n",
    "            'gen_answer': answer,\n",
    "            'chunks': chunks\n",
    "        }\n",
    "\n",
    "    prediction_df.loc[:, 'gen_answer'] = prediction_df['question'].apply(lambda q: answer_dict[q]['gen_answer'])\n",
    "    prediction_df.loc[:, 'chunks'] = prediction_df['question'].apply(lambda q: answer_dict[q]['chunks'])\n",
    "\n",
    "    return prediction_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "ca46d5f1-e698-457d-abb6-92d83cd59c66",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:35:35.450388Z",
     "iopub.status.busy": "2024-12-04T13:35:35.450264Z",
     "iopub.status.idle": "2024-12-04T13:40:49.307328Z",
     "shell.execute_reply": "2024-12-04T13:40:49.306804Z",
     "shell.execute_reply.started": "2024-12-04T13:35:35.450377Z"
    },
    "papermill": {
     "duration": 514.92352,
     "end_time": "2024-11-23T14:44:02.805529",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.882009",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cc96170ba9044140a228afe40d30b8ac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pred_df = predict(md_vector_db, ollama_llm, prediction_df, n_chunks=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "f7026bac-9927-4a33-85c0-bc1b35f3a603",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:40:49.307945Z",
     "iopub.status.busy": "2024-12-04T13:40:49.307817Z",
     "iopub.status.idle": "2024-12-04T13:40:49.525707Z",
     "shell.execute_reply": "2024-12-04T13:40:49.525142Z",
     "shell.execute_reply.started": "2024-12-04T13:40:49.307933Z"
    }
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'predictions.xlsx')\n",
    "pred_df.to_excel(save_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d79e974-089f-4c08-ba5e-804f6542e06a",
   "metadata": {
    "papermill": {
     "duration": 0.14423,
     "end_time": "2024-11-23T14:44:03.513124",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.368894",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "217568fe-c0e4-49eb-9a7c-9fdfbc033d8a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:40:49.526609Z",
     "iopub.status.busy": "2024-12-04T13:40:49.526435Z",
     "iopub.status.idle": "2024-12-04T13:40:49.743688Z",
     "shell.execute_reply": "2024-12-04T13:40:49.743191Z",
     "shell.execute_reply.started": "2024-12-04T13:40:49.526593Z"
    },
    "papermill": {
     "duration": 0.369729,
     "end_time": "2024-11-23T14:44:04.017198",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.647469",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "import time\n",
    "\n",
    "judge_llm = ChatOpenAI(\n",
    "    api_key=os.environ['LLM_API_KEY'],\n",
    "    base_url=os.environ['LLM_BASE_URL'],\n",
    "    model_name='qwen2-72b-instruct',\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "def evaluate(prediction_df):\n",
    "    \"\"\"\n",
    "    对预测结果进行打分\n",
    "    :param prediction_df: 预测结果，需要包含问题，参考答案，生成的答案，列名分别为question, ref_answer, gen_answer\n",
    "    :return 打分模型原始返回结果\n",
    "    \"\"\"\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个经济学博士，现在我有一系列问题，有一个助手已经对这些问题进行了回答，你需要参照参考答案，评价这个助手的回答是否正确，仅回复“是”或“否”即可，不要带其他描述性内容或无关信息。\n",
    "问题：\n",
    "<question>\n",
    "{{question}}\n",
    "</question>\n",
    "\n",
    "参考答案：\n",
    "<ref_answer>\n",
    "{{ref_answer}}\n",
    "</ref_answer>\n",
    "\n",
    "助手回答：\n",
    "<gen_answer>\n",
    "{{gen_answer}}\n",
    "</gen_answer>\n",
    "请评价：\n",
    "    \"\"\"\n",
    "    results = []\n",
    "\n",
    "    for _, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        question = row['question']\n",
    "        ref_answer = row['ref_answer']\n",
    "        gen_answer = row['gen_answer']\n",
    "\n",
    "        prompt = prompt_tmpl.replace('{{question}}', question).replace('{{ref_answer}}', str(ref_answer)).replace('{{gen_answer}}', gen_answer).strip()\n",
    "        \n",
    "        retry_count = 3\n",
    "        result = ''\n",
    "        \n",
    "        while retry_count > 0:\n",
    "            try:\n",
    "                result = judge_llm.invoke(prompt).content\n",
    "                break\n",
    "            except Exception as e:\n",
    "                retry_count -= 1\n",
    "                sleeping_seconds = 2 ** (4 - retry_count)\n",
    "                print(f\"query={query}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "                \n",
    "                time.sleep(sleeping_seconds)\n",
    "        \n",
    "        results.append(result)\n",
    "\n",
    "        time.sleep(1)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "71db81af-b8f9-47ba-958b-761896516605",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:40:49.744253Z",
     "iopub.status.busy": "2024-12-04T13:40:49.744127Z",
     "iopub.status.idle": "2024-12-04T13:43:23.508426Z",
     "shell.execute_reply": "2024-12-04T13:43:23.506071Z",
     "shell.execute_reply.started": "2024-12-04T13:40:49.744241Z"
    },
    "papermill": {
     "duration": 150.566109,
     "end_time": "2024-11-23T14:46:34.714324",
     "exception": false,
     "start_time": "2024-11-23T14:44:04.148215",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "70fca36d98864f548ffce6ed461d9461",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "pred_df['raw_score'] = evaluate(pred_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "7da1b98e-99aa-4e11-9297-91eac1c62493",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.511624Z",
     "iopub.status.busy": "2024-12-04T13:43:23.510816Z",
     "iopub.status.idle": "2024-12-04T13:43:23.525209Z",
     "shell.execute_reply": "2024-12-04T13:43:23.523671Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.511551Z"
    },
    "papermill": {
     "duration": 0.138037,
     "end_time": "2024-11-23T14:46:35.040595",
     "exception": false,
     "start_time": "2024-11-23T14:46:34.902558",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['是', '否'], dtype=object)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_df['raw_score'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "2c99c078-d294-40b8-b57b-31cfd7349c3e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.527638Z",
     "iopub.status.busy": "2024-12-04T13:43:23.527130Z",
     "iopub.status.idle": "2024-12-04T13:43:23.536449Z",
     "shell.execute_reply": "2024-12-04T13:43:23.535541Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.527590Z"
    },
    "papermill": {
     "duration": 0.107466,
     "end_time": "2024-11-23T14:46:35.243603",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.136137",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "pred_df['score'] = (pred_df['raw_score'] == '是').astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "423897f2-786e-415b-a613-55a4359faf76",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.537712Z",
     "iopub.status.busy": "2024-12-04T13:43:23.537419Z",
     "iopub.status.idle": "2024-12-04T13:43:23.544513Z",
     "shell.execute_reply": "2024-12-04T13:43:23.544000Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.537684Z"
    },
    "papermill": {
     "duration": 0.094328,
     "end_time": "2024-11-23T14:46:35.431162",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.336834",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.37"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_df['score'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "79325429-9cf1-4e2c-95ac-cb0c1a3b6156",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.545390Z",
     "iopub.status.busy": "2024-12-04T13:43:23.545185Z",
     "iopub.status.idle": "2024-12-04T13:43:23.741457Z",
     "shell.execute_reply": "2024-12-04T13:43:23.740860Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.545372Z"
    },
    "papermill": {
     "duration": 0.289336,
     "end_time": "2024-11-23T14:46:35.804651",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.515315",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'eval_df.xlsx')\n",
    "pred_df.to_excel(save_path, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e9264087-cea4-4131-98eb-7875b0cbaddf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T08:01:27.295186Z",
     "iopub.status.busy": "2024-12-04T08:01:27.294401Z",
     "iopub.status.idle": "2024-12-04T08:01:27.302698Z",
     "shell.execute_reply": "2024-12-04T08:01:27.301299Z",
     "shell.execute_reply.started": "2024-12-04T08:01:27.295113Z"
    }
   },
   "source": [
    "# 结果分析"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0806dc6c-a009-4f4e-92d5-0c21cd244bd2",
   "metadata": {},
   "source": [
    "结果非常差，前面我们统计了不同切片的文本长度，有大量的文本都超长了，可能带来的问题就是超长内容无法有效索引，影响索引效果，进而影响最终答案\n",
    "\n",
    "下面分析下回答正确、错误的问题，响应的上下文的长度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "88fc7227-9c21-48da-b179-5070406eb113",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.742084Z",
     "iopub.status.busy": "2024-12-04T13:43:23.741925Z",
     "iopub.status.idle": "2024-12-04T13:43:23.746759Z",
     "shell.execute_reply": "2024-12-04T13:43:23.746209Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.742068Z"
    },
    "papermill": {
     "duration": 0.088622,
     "end_time": "2024-11-23T14:46:36.016801",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.928179",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "pred_df['avg_chunk_len'] = pred_df['chunks'].apply(lambda chunks: sum([len(d.page_content) for d in chunks]) / len(chunks))\n",
    "pred_df['max_chunk_len'] = pred_df['chunks'].apply(lambda chunks: max([len(d.page_content) for d in chunks]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "fb1a3e72-cb28-419a-8f6c-a0ef5d34c67a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.747525Z",
     "iopub.status.busy": "2024-12-04T13:43:23.747292Z",
     "iopub.status.idle": "2024-12-04T13:43:23.755934Z",
     "shell.execute_reply": "2024-12-04T13:43:23.755503Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.747508Z"
    }
   },
   "outputs": [],
   "source": [
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "ff2413c8-f7a7-4e70-9178-c42ff2427426",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.759443Z",
     "iopub.status.busy": "2024-12-04T13:43:23.756359Z",
     "iopub.status.idle": "2024-12-04T13:43:23.833291Z",
     "shell.execute_reply": "2024-12-04T13:43:23.832861Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.759419Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='score', ylabel='avg_chunk_len'>"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAArFUlEQVR4nO3deXRUZYL+8aeSkKoASTBAUskQQ4QjawAFJuSIGZZMQowKiPYguLSijHSCArYs0wgM7cg0Sgsu7TIu2Kdl0J4WW4EgMUAiEFBpwybSgGEChiQMS0IYErLU7w+b+lkCmikqdSu83885dUzd+9at56VOzFP33rplc7lcLgEAABgsyOoAAAAAVqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYL8TqAK1FU1OTysrKFB4eLpvNZnUcAADQDC6XS2fOnFFcXJyCgi6/H4hC1ExlZWWKj4+3OgYAAPDCkSNH1KVLl8uupxA1U3h4uKTv/kEjIiIsTgMAAJqjurpa8fHx7r/jl0MhaqYLh8kiIiIoRAAAtDI/dboLJ1UDAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEsLUSLFi3S4MGDFR4erujoaI0ZM0b79+/3GDNs2DDZbDaP2yOPPOIxprS0VFlZWWrbtq2io6P1xBNPqKGhwWPMpk2bdOONN8put6t79+5avnx5S08PAAC0EpYWooKCAmVnZ2vbtm3Ky8tTfX290tPTdfbsWY9xDz/8sI4dO+a+LV682L2usbFRWVlZOn/+vLZu3aq3335by5cv17x589xjSkpKlJWVpeHDh6u4uFjTpk3TQw89pI8//thvcwUAAIHL5nK5XFaHuOD48eOKjo5WQUGBUlNTJX23h2jAgAFaunTpJR+Tm5urW2+9VWVlZYqJiZEkvfLKK5o1a5aOHz+u0NBQzZo1S2vWrNGePXvcjxs/frxOnz6tdevWXXK7dXV1qqurc9+/8G25VVVVfLnrZbhcLtXW1lod44q5XC73a2+323/yCwEDncPhaPVzAABvVVdXKzIy8if/fgfUt91XVVVJkqKiojyWv/POO/rDH/4gp9Op2267TU8++aTatm0rSSoqKlJSUpK7DElSRkaGpkyZor179+qGG25QUVGR0tLSPLaZkZGhadOmXTbLokWL9K//+q8+mpkZamtrlZmZaXUM/EBubq7CwsKsjgEAAS1gClFTU5OmTZumm266SX379nUvnzBhghISEhQXF6ddu3Zp1qxZ2r9/v95//31JUnl5uUcZkuS+X15e/qNjqqurde7cuUv+sZgzZ45mzJjhvn9hDxEAALj6BEwhys7O1p49e7R582aP5ZMnT3b/nJSUpNjYWI0cOVKHDh1St27dWiyP3W6X3W5vse1fjRwOh3Jzc62OccVqa2s1duxYSdKqVavkcDgsTnRlWnt+APCHgChEOTk5Wr16tQoLC9WlS5cfHZucnCxJOnjwoLp16yan06nPPvvMY0xFRYUkyel0uv97Ydn3x0RERHAowYdsNttV9+/pcDiuujkBAC5m6afMXC6XcnJytGrVKm3YsEGJiYk/+Zji4mJJUmxsrCQpJSVFu3fvVmVlpXtMXl6eIiIi1Lt3b/eY/Px8j+3k5eUpJSXFRzMBAACtmaWFKDs7W3/4wx+0YsUKhYeHq7y8XOXl5Tp37pwk6dChQ/r1r3+tHTt26PDhw/rwww913333KTU1Vf369ZMkpaenq3fv3rr33nu1c+dOffzxx5o7d66ys7Pdh7weeeQRffPNN5o5c6a+/vpr/e53v9N7772n6dOnWzZ3AAAQOCwtRC+//LKqqqo0bNgwxcbGum/vvvuuJCk0NFSffPKJ0tPT1bNnTz3++OMaN26cPvroI/c2goODtXr1agUHByslJUX33HOP7rvvPi1cuNA9JjExUWvWrFFeXp769++vJUuW6PXXX1dGRobf5wwAAAJPQF2HKJA19zoGaP3OnTvnvnwAH1kHgNatuX+/+S4zAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONZWogWLVqkwYMHKzw8XNHR0RozZoz279/vMaa2tlbZ2dnq2LGj2rdvr3HjxqmiosJjTGlpqbKystS2bVtFR0friSeeUENDg8eYTZs26cYbb5Tdblf37t21fPnylp4eAABoJSwtRAUFBcrOzta2bduUl5en+vp6paen6+zZs+4x06dP10cffaQ//vGPKigoUFlZme644w73+sbGRmVlZen8+fPaunWr3n77bS1fvlzz5s1zjykpKVFWVpaGDx+u4uJiTZs2TQ899JA+/vhjv84XAAAEJpvL5XJZHeKC48ePKzo6WgUFBUpNTVVVVZU6d+6sFStW6M4775Qkff311+rVq5eKioo0ZMgQ5ebm6tZbb1VZWZliYmIkSa+88opmzZql48ePKzQ0VLNmzdKaNWu0Z88e93ONHz9ep0+f1rp16y6Zpa6uTnV1de771dXVio+PV1VVlSIiIlrwXwFWO3funDIzMyVJubm5CgsLszgRAMBb1dXVioyM/Mm/3wF1DlFVVZUkKSoqSpK0Y8cO1dfXKy0tzT2mZ8+euvbaa1VUVCRJKioqUlJSkrsMSVJGRoaqq6u1d+9e95jvb+PCmAvbuJRFixYpMjLSfYuPj/fNJAEAQMAJmELU1NSkadOm6aabblLfvn0lSeXl5QoNDVWHDh08xsbExKi8vNw95vtl6ML6C+t+bEx1dbXOnTt3yTxz5sxRVVWV+3bkyJErniMAAAhMIVYHuCA7O1t79uzR5s2brY4iSbLb7bLb7VbHAAAAfhAQe4hycnK0evVqbdy4UV26dHEvdzqdOn/+vE6fPu0xvqKiQk6n0z3mh586u3D/p8ZERERwfggAALC2ELlcLuXk5GjVqlXasGGDEhMTPdYPHDhQbdq0UX5+vnvZ/v37VVpaqpSUFElSSkqKdu/ercrKSveYvLw8RUREqHfv3u4x39/GhTEXtgEAAMxm6SGz7OxsrVixQn/+858VHh7uPucnMjJSYWFhioyM1KRJkzRjxgxFRUUpIiJCU6dOVUpKioYMGSJJSk9PV+/evXXvvfdq8eLFKi8v19y5c5Wdne0+5PXII4/oxRdf1MyZM/Xggw9qw4YNeu+997RmzRrL5g4AAAKHpXuIXn75ZVVVVWnYsGGKjY1139599133mOeee0633nqrxo0bp9TUVDmdTr3//vvu9cHBwVq9erWCg4OVkpKie+65R/fdd58WLlzoHpOYmKg1a9YoLy9P/fv315IlS/T6668rIyPDr/MFAACBKaCuQxTImnsdA7R+XIcIAK4erfI6RAAAAFagEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMF+LNgxobG7V8+XLl5+ersrJSTU1NHus3bNjgk3AAAAD+4FUheuyxx7R8+XJlZWWpb9++stlsvs4FAADgN14VopUrV+q9997TLbfc4us8AAAAfufVOUShoaHq3r27r7MAAABYwqtC9Pjjj2vZsmVyuVy+zgMAAOB3Xh0y27x5szZu3Kjc3Fz16dNHbdq08Vj//vvv+yQcAACAP3hViDp06KCxY8f6OgsAAIAlvCpEb731lq9zAAAAWMbrCzM2NDTok08+0auvvqozZ85IksrKylRTU+OzcAAAAP7g1R6i//7v/9aoUaNUWlqquro6/eM//qPCw8P1m9/8RnV1dXrllVd8nRMAAKDFeLWH6LHHHtOgQYN06tQphYWFuZePHTtW+fn5PgsHAADgD17tIfr000+1detWhYaGeizv2rWrvv32W58EAwAA8Bev9hA1NTWpsbHxouVHjx5VeHj4FYcCAADwJ68KUXp6upYuXeq+b7PZVFNTo/nz5/N1HgAAoNXx6pDZkiVLlJGRod69e6u2tlYTJkzQgQMH1KlTJ/3nf/6nrzMCAAC0KK8KUZcuXbRz506tXLlSu3btUk1NjSZNmqSJEyd6nGQNALg6uFwu1dbWWh3jirlcLtXV1UmS7Ha7bDabxYmujMPhaPVzCBReFSJJCgkJ0T333OPLLACAAFVbW6vMzEyrY+AHcnNz2RHhI80uRB9++GGzN3r77bd7FQYAAMAKzS5EY8aMadY4m812yU+gAQBaL4fDodzcXKtjXLHa2lr3d3GuWrVKDofD4kRXprXnDyTNLkRNTU0tmcNoV8ux+avF918LXpfAwbkS1rLZbFfdoRmHw3HVzQne8/ocouZISkrS2rVrFR8f35JP0+pxbD5wXXgnCetxrgSAluT1l7s2x+HDh1VfX9+STwEAAHDFWnQP0U8pLCzUM888ox07dujYsWNatWqVx7lKP//5z/X22297PCYjI0Pr1q1z3z958qSmTp2qjz76SEFBQRo3bpyWLVum9u3bu8fs2rVL2dnZ+vzzz9W5c2dNnTpVM2fObPH5eaNmwN1yBVn6ssDlkpoavvs5KETiMI1lbE0Nal/Mtc0AtDxL//KePXtW/fv314MPPqg77rjjkmNGjRqlt956y33fbrd7rJ84caKOHTumvLw81dfX64EHHtDkyZO1YsUKSVJ1dbXS09OVlpamV155Rbt379aDDz6oDh06aPLkyS03OS+5gkKk4DZWx4BCf3oIWpzL6gAAjGFpIcrMzPzJc2fsdrucTucl1+3bt0/r1q3T559/rkGDBkmSXnjhBd1yyy169tlnFRcXp3feeUfnz5/Xm2++qdDQUPXp00fFxcX67W9/+6OFqK6uzn3xLum7YgUAAK5OLXoOkS9s2rRJ0dHR6tGjh6ZMmaITJ0641xUVFalDhw7uMiRJaWlpCgoK0vbt291jUlNTFRr6/9/xZ2RkaP/+/Tp16tRln3fRokWKjIx03zgxHACAq1dAF6JRo0bp97//vfLz8/Wb3/xGBQUFyszMdF/nqLy8XNHR0R6PCQkJUVRUlMrLy91jYmJiPMZcuH9hzKXMmTNHVVVV7tuRI0d8OTUAABBAvDpkdvToUXXp0uWS67Zt26YhQ4ZIkl599dWLysj/xfjx490/JyUlqV+/furWrZs2bdqkkSNHer3d5rDb7RedrwQAAK5OXu0hSk9P18mTJy9avmXLFo0aNcp9f8KECWrXrp336X7guuuuU6dOnXTw4EFJktPpVGVlpceYhoYGnTx50n3ekdPpVEVFhceYC/cvd24SAAAwi1eFaMiQIUpPT9eZM2fcywoLC3XLLbdo/vz5Pgv3Q0ePHtWJEycUGxsrSUpJSdHp06e1Y8cO95gNGzaoqalJycnJ7jGFhYUe10PKy8tTjx49dM0117RYVgAA0Hp4VYhef/11XXvttbrttttUV1enjRs3KisrSwsXLtT06dObvZ2amhoVFxeruLhYklRSUqLi4mKVlpaqpqZGTzzxhLZt26bDhw8rPz9fo0ePVvfu3ZWRkSFJ6tWrl0aNGqWHH35Yn332mbZs2aKcnByNHz9ecXFxkr7bSxUaGqpJkyZp7969evfdd7Vs2TLNmDHDm6kDAICrkFeFKCgoSCtXrlSbNm00YsQI3X777Vq0aJEee+yx/9N2vvjiC91www264YYbJEkzZszQDTfcoHnz5ik4OFi7du3S7bffruuvv16TJk3SwIED9emnn3qc2/POO++oZ8+eGjlypG655RYNHTpUr732mnt9ZGSk1q9fr5KSEg0cOFCPP/645s2bF5DXIAIAANZo9knVu3btumjZggULdPfdd+uee+5Ramqqe0y/fv2atc1hw4bJ5br8pdc+/vjjn9xGVFSU+yKMl9OvXz99+umnzcoEAADM0+xCNGDAANlsNo8Cc+H+q6++qtdee00ul0s2m839sXgAAIDWoNmFqKSkpCVzAAAAWKbZhSghIaElcwAAAFjG6+8yO3DggDZu3KjKyko1NTV5rJs3b94VBwMAAPAXrwrRf/zHf2jKlCnq1KmTnE6nbDabe53NZqMQAQCAVsWrQvTUU0/p3/7t3zRr1ixf5wEAAPA7r65DdOrUKd11112+zgIAAGAJrwrRXXfdpfXr1/s6CwAAgCW8OmTWvXt3Pfnkk9q2bZuSkpLUpk0bj/WPPvqoT8IBAAD4g1eF6LXXXlP79u1VUFCggoICj3U2m41CBAAAWhWvChEXaQQAAFcTr84hAgAAuJp4tYfowQcf/NH1b775pldhAAAArOBVITp16pTH/fr6eu3Zs0enT5/WiBEjfBIMAADAX7wqRKtWrbpoWVNTk6ZMmaJu3bpdcSgAAAB/8tk5REFBQZoxY4aee+45X20SAADAL3x6UvWhQ4fU0NDgy00CAAC0OK8Omc2YMcPjvsvl0rFjx7RmzRrdf//9PgkGAADgL14Voi+//NLjflBQkDp37qwlS5b85CfQAAAAAo1XhWjjxo2+zgEAAGAZLswIAACM51Uhqqio0L333qu4uDiFhIQoODjY4wYAANCaeHXI7Oc//7lKS0v15JNPKjY2Vjabzde5AAAA/MarQrR582Z9+umnGjBggI/jAAAA+J9Xh8zi4+Plcrl8nQUAAMASXhWipUuXavbs2Tp8+LCP4wAAAPhfsw+ZXXPNNR7nCp09e1bdunVT27Zt1aZNG4+xJ0+e9F1CAACAFtbsQrR06dIWjAEAAGCdZhcivpIDAABcrbw6h2jt2rX6+OOPL1q+fv165ebmXnEoAAAAf/KqEM2ePVuNjY0XLW9qatLs2bOvOBQAAIA/eVWIDhw4oN69e1+0vGfPnjp48OAVhwIAAPAnrwpRZGSkvvnmm4uWHzx4UO3atbviUAAAAP7kVSEaPXq0pk2bpkOHDrmXHTx4UI8//rhuv/12n4UDAADwB68K0eLFi9WuXTv17NlTiYmJSkxMVK9evdSxY0c9++yzvs4IAADQorz6LrPIyEht3bpVeXl52rlzp8LCwtSvXz+lpqb6Oh8AAECL86oQSZLNZlN6errS09MvOyYpKUlr165VfHy8t08DAADQ4rw6ZNZchw8fVn19fUs+BQAAwBVr0UIEAADQGnh9yAwtpJE9aoAbvw8A/IRCFGDCd660OgIAAMbhkBkAADAee4gCzJn+46XgNlbHAAJDYz17TQH4RYsWoldffVUxMTEt+RRXn+A2FCIAAPzMq0L0/PPPX3K5zWaTw+FQ9+7dlZqaqgkTJlxROAAAAH/wqhA999xzOn78uP73f/9X11xzjSTp1KlTatu2rdq3b6/Kykpdd9112rhxIxdlBAAAAc+rk6qffvppDR48WAcOHNCJEyd04sQJ/fWvf1VycrKWLVum0tJSOZ1OTZ8+3dd5AQAAfM6rPURz587Vn/70J3Xr1s29rHv37nr22Wc1btw4ffPNN1q8eLHGjRvns6AAAAAtxas9RMeOHVNDQ8NFyxsaGlReXi5JiouL05kzZ64sHQAAgB94VYiGDx+uf/7nf9aXX37pXvbll19qypQpGjFihCRp9+7dSkxM9E1KAACAFuRVIXrjjTcUFRWlgQMHym63y263a9CgQYqKitIbb7whSWrfvr2WLFni07AAAAAtwatziJxOp/Ly8vT111/rr3/9qySpR48e6tGjh3vM8OHDfZMQAACghXlViDZv3qyhQ4eqZ8+e6tmzp68zAQAA+JVXh8xGjBihxMRE/cu//Iu++uorX2cCAADwK68KUVlZmR5//HEVFBSob9++GjBggJ555hkdPXrU1/kAAABanFeFqFOnTsrJydGWLVt06NAh3XXXXXr77bfVtWtX96fMAAAAWguvCtH3JSYmavbs2fr3f/93JSUlqaCgwBe5AAAA/OaKCtGWLVv0i1/8QrGxsZowYYL69u2rNWvW+CobAACAX3j1KbM5c+Zo5cqV+vbbb5Wenq5ly5Zp9OjRatu2ra/zAQAAtDivClFhYaGeeOIJ/exnP1OnTp18nQkAAMCvvCpEW7ZskSR99dVX+uKLL3T+/HmP9bfffvuVJwMAAPATrwpRSUmJxo4dq127dslms8nlckmSbDabJKmxsdF3CQEAAFqYVydVP/roo+ratasqKyvVtm1b7d27V4WFhRo0aJA2bdrU7O0UFhbqtttuU1xcnGw2mz744AOP9S6XS/PmzVNsbKzCwsKUlpamAwcOeIw5efKkJk6cqIiICHXo0EGTJk1STU2Nx5hdu3bp5ptvlsPhUHx8vBYvXuzNtAEAwFXKq0JUVFSkhQsXqlOnTgoKClJQUJCGDh2qRYsW6dFHH232ds6ePav+/fvrpZdeuuT6xYsX6/nnn9crr7yi7du3q127dsrIyFBtba17zMSJE7V3717l5eVp9erVKiws1OTJk93rq6urlZ6eroSEBO3YsUPPPPOMFixYoNdee82bqQMAgKuQV4fMGhsbFR4eLum7izSWlZWpR48eSkhI0P79+5u9nczMTGVmZl5yncvl0tKlSzV37lyNHj1akvT73/9eMTEx+uCDDzR+/Hjt27dP69at0+eff65BgwZJkl544QXdcsstevbZZxUXF6d33nlH58+f15tvvqnQ0FD16dNHxcXF+u1vf+tRnH6orq5OdXV17vvV1dXNnhcAAGhdvNpD1LdvX+3cuVOSlJycrMWLF2vLli1auHChrrvuOp8EKykpUXl5udLS0tzLIiMjlZycrKKiIknf7anq0KGDuwxJUlpamoKCgrR9+3b3mNTUVIWGhrrHZGRkaP/+/Tp16tRln3/RokWKjIx03+Lj430yLwAAEHi8KkRz585VU1OTJGnhwoUqKSnRzTffrLVr1+r555/3SbDy8nJJUkxMjMfymJgY97ry8nJFR0d7rA8JCVFUVJTHmEtt4/vPcSlz5sxRVVWV+3bkyJErmxAAAAhYXh0yy8jIcP/cvXt3ff311zp58qSuueYa9yfNWju73S673W51DAAA4AdX/F1mF0RFRfm0DDmdTklSRUWFx/KKigr3OqfTqcrKSo/1DQ0NOnnypMeYS23j+88BAADM5rNC5GuJiYlyOp3Kz893L6uurtb27duVkpIiSUpJSdHp06e1Y8cO95gNGzaoqalJycnJ7jGFhYWqr693j8nLy1OPHj10zTXX+Gk2AAAgkFlaiGpqalRcXKzi4mJJ351IXVxcrNLSUtlsNk2bNk1PPfWUPvzwQ+3evVv33Xef4uLiNGbMGElSr169NGrUKD388MP67LPPtGXLFuXk5Gj8+PGKi4uTJE2YMEGhoaGaNGmS9u7dq3fffVfLli3TjBkzLJo1AAAINF6dQ+QrX3zxhYYPH+6+f6Gk3H///Vq+fLlmzpyps2fPavLkyTp9+rSGDh2qdevWyeFwuB/zzjvvKCcnRyNHjlRQUJDGjRvncWJ3ZGSk1q9fr+zsbA0cOFCdOnXSvHnzfvQj9wAAwCyWFqJhw4a5v/bjUmw2mxYuXKiFCxdedkxUVJRWrFjxo8/Tr18/ffrpp17nBABvuVwuj4vJwlrffy14XQKHw+Gw/ENZlhYiALja1dbWXvYCtLDW2LFjrY6Av8nNzVVYWJilGQL2pGoAAAB/YQ8RAPjJi0NPyh58+dME0PJcLun8d9cVVmiQdJVcOq9Vqmu0KWdzlNUx3ChEAOAn9mCX7MFWp4Djp4fALwLrzQGHzAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGC/E6gDwZGtqkMvqEKZzuaSmhu9+DgqRbDZr8xjMduF1AIAWRiEKMO2L/9PqCAAAGIdDZgAAwHjsIQoADodDubm5VsfA39TW1mrs2LGSpFWrVsnhcFicCJJ4HQC0KApRALDZbAoLC7M6Bi7B4XDw2gCAAThkBgAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAOOFWB0AAExR12h1AiBwBNrvA4UIAPwkZ3NHqyMAuAwOmQEAAOOxhwgA/OTFoSdkD7Y6BRAY6hoDa68phQgA/MQeLAoREKAC/pDZggULZLPZPG49e/Z0r6+trVV2drY6duyo9u3ba9y4caqoqPDYRmlpqbKystS2bVtFR0friSeeUENDg7+nAgAAAlSr2EPUp08fffLJJ+77ISH/P/b06dO1Zs0a/fGPf1RkZKRycnJ0xx13aMuWLZKkxsZGZWVlyel0auvWrTp27Jjuu+8+tWnTRk8//bTf5wIAAAJPqyhEISEhcjqdFy2vqqrSG2+8oRUrVmjEiBGSpLfeeku9evXStm3bNGTIEK1fv15fffWVPvnkE8XExGjAgAH69a9/rVmzZmnBggUKDQ295HPW1dWprq7Ofb+6urplJgcAACwX8IfMJOnAgQOKi4vTddddp4kTJ6q0tFSStGPHDtXX1ystLc09tmfPnrr22mtVVFQkSSoqKlJSUpJiYmLcYzIyMlRdXa29e/de9jkXLVqkyMhI9y0+Pr6FZgcAAKwW8IUoOTlZy5cv17p16/Tyyy+rpKREN998s86cOaPy8nKFhoaqQ4cOHo+JiYlReXm5JKm8vNyjDF1Yf2Hd5cyZM0dVVVXu25EjR3w7MQAAEDAC/pBZZmam++d+/fopOTlZCQkJeu+99xQWFtZiz2u322W321ts+wAAIHAE/B6iH+rQoYOuv/56HTx4UE6nU+fPn9fp06c9xlRUVLjPOXI6nRd96uzC/UudlwQAAMzT6gpRTU2NDh06pNjYWA0cOFBt2rRRfn6+e/3+/ftVWlqqlJQUSVJKSop2796tyspK95i8vDxFRESod+/efs8PAAACT8AfMvvlL3+p2267TQkJCSorK9P8+fMVHBysu+++W5GRkZo0aZJmzJihqKgoRUREaOrUqUpJSdGQIUMkSenp6erdu7fuvfdeLV68WOXl5Zo7d66ys7M5JAYAACS1gkJ09OhR3X333Tpx4oQ6d+6soUOHatu2bercubMk6bnnnlNQUJDGjRunuro6ZWRk6He/+5378cHBwVq9erWmTJmilJQUtWvXTvfff78WLlxo1ZQAAECACfhCtHLlyh9d73A49NJLL+mll1667JiEhAStXbvW19EAAMBVotWdQwQAAOBrFCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMF7Af5cZAFwt6hptklxWxzCayyWdb/ru59AgyWazNo/Jvvt9CBwUIgDwk5zNUVZHAHAZHDIDAADGYw8RALQgh8Oh3Nxcq2Pgb2prazV27FhJ0qpVq+RwOCxOBEkB8TpQiACgBdlsNoWFhVkdA5fgcDh4beDGITMAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwXojVAXD1cLlcqq2ttTrGFfv+HK6G+TgcDtlsNqtjAEBAoxDBZ2pra5WZmWl1DJ8aO3as1RGuWG5ursLCwqyOgVaONzyBiTc8vkMhAgD8JN7wBCbe8PgOhQg+43A4lJuba3WMK+ZyuVRXVydJstvtrf7dl8PhsDoCAAQ8ChF8xmazXTXvVNq2bWt1BCCg8IYnMPGGx3coRACAn8QbHlzt+Ng9AAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAOPxbffN5HK5JEnV1dUWJwEAAM114e/2hb/jl0MhaqYzZ85IkuLj4y1OAgAA/q/OnDmjyMjIy663uX6qMkGS1NTUpLKyMoWHh8tms1kdBy2surpa8fHxOnLkiCIiIqyOA8CH+P02i8vl0pkzZxQXF6egoMufKcQeomYKCgpSly5drI4BP4uIiOB/mMBVit9vc/zYnqELOKkaAAAYj0IEAACMRyECLsFut2v+/Pmy2+1WRwHgY/x+41I4qRoAABiPPUQAAMB4FCIAAGA8ChEAADAehQgAABiPQgT8wEsvvaSuXbvK4XAoOTlZn332mdWRAPhAYWGhbrvtNsXFxclms+mDDz6wOhICCIUI+J53331XM2bM0Pz58/WXv/xF/fv3V0ZGhiorK62OBuAKnT17Vv3799dLL71kdRQEID52D3xPcnKyBg8erBdffFHSd99hFx8fr6lTp2r27NkWpwPgKzabTatWrdKYMWOsjoIAwR4i4G/Onz+vHTt2KC0tzb0sKChIaWlpKioqsjAZAKClUYiAv/mf//kfNTY2KiYmxmN5TEyMysvLLUoFAPAHChEAADAehQj4m06dOik4OFgVFRUeyysqKuR0Oi1KBQDwBwoR8DehoaEaOHCg8vPz3cuampqUn5+vlJQUC5MBAFpaiNUBgEAyY8YM3X///Ro0aJD+/u//XkuXLtXZs2f1wAMPWB0NwBWqqanRwYMH3fdLSkpUXFysqKgoXXvttRYmQyDgY/fAD7z44ot65plnVF5ergEDBuj5559XcnKy1bEAXKFNmzZp+PDhFy2///77tXz5cv8HQkChEAEAAONxDhEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBADNdP78easjAGghFCIArd5//dd/KSkpSWFhYerYsaPS0tJ09uxZSdKbb76pPn36yG63KzY2Vjk5Oe7HlZaWavTo0Wrfvr0iIiL0s5/9TBUVFe71CxYs0IABA/T6668rMTFRDodDknT69Gk99NBD6ty5syIiIjRixAjt3LnTv5MG4FMUIgCt2rFjx3T33XfrwQcf1L59+7Rp0ybdcccdcrlcevnll5Wdna3Jkydr9+7d+vDDD9W9e3dJUlNTk0aPHq2TJ0+qoKBAeXl5+uabb/RP//RPHts/ePCg/vSnP+n9999XcXGxJOmuu+5SZWWlcnNztWPHDt14440aOXKkTp486e/pA/ARvu0eQKv2l7/8RQMHDtThw4eVkJDgse7v/u7v9MADD+ipp5666HF5eXnKzMxUSUmJ4uPjJUlfffWV+vTpo88++0yDBw/WggUL9PTTT+vbb79V586dJUmbN29WVlaWKisrZbfb3dvr3r27Zs6cqcmTJ7fgbAG0lBCrAwDAlejfv79GjhyppKQkZWRkKD09XXfeeafq6+tVVlamkSNHXvJx+/btU3x8vLsMSVLv3r3VoUMH7du3T4MHD5YkJSQkuMuQJO3cuVM1NTXq2LGjx/bOnTunQ4cOtcAMAfgDhQhAqxYcHKy8vDxt3bpV69ev1wsvvKBf/epXys/P98n227Vr53G/pqZGsbGx2rRp00VjO3To4JPnBOB/FCIArZ7NZtNNN92km266SfPmzVNCQoLy8vLUtWtX5efna/jw4Rc9plevXjpy5IiOHDniccjs9OnT6t2792Wf68Ybb1R5eblCQkLUtWvXlpoSAD+jEAFo1bZv3678/Hylp6crOjpa27dv1/Hjx9WrVy8tWLBAjzzyiKKjo5WZmakzZ85oy5Ytmjp1qtLS0pSUlKSJEydq6dKlamho0C9+8Qv9wz/8gwYNGnTZ50tLS1NKSorGjBmjxYsX6/rrr1dZWZnWrFmjsWPH/uhjAQQuChGAVi0iIkKFhYVaunSpqqurlZCQoCVLligzM1OSVFtbq+eee06//OUv1alTJ915552Svtur9Oc//1lTp05VamqqgoKCNGrUKL3wwgs/+nw2m01r167Vr371Kz3wwAM6fvy4nE6nUlNTFRMT0+LzBdAy+JQZAAAwHtchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDx/h8eQ45Fmuv5YwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='score', y='avg_chunk_len', data=pred_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "7c23c763-f6cf-43b1-9659-8617ce1ddb8e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T13:43:23.833890Z",
     "iopub.status.busy": "2024-12-04T13:43:23.833759Z",
     "iopub.status.idle": "2024-12-04T13:43:23.899395Z",
     "shell.execute_reply": "2024-12-04T13:43:23.898949Z",
     "shell.execute_reply.started": "2024-12-04T13:43:23.833877Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='score', ylabel='max_chunk_len'>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAr8klEQVR4nO3de3RU9aH+/2cSyExAJjFAMkmNkcqqAgaQizFFKJeYECkVxAuKQAHlcE6CC6lCqYpcPGJpFauiLBYo+jugUI9QuQQJUQQhRg1ELiKtSBdUSGKNyRAOuc78/lDmy8hFnEyyJ/m8X2vttTJ7f7Ln2U1DHvf+7Nk2r9frFQAAgMHCrA4AAABgNQoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxWlkdoLnweDw6fvy42rVrJ5vNZnUcAABwCbxer06ePKmEhASFhV34PBCF6BIdP35ciYmJVscAAAABOHbsmK644ooLbqcQXaJ27dpJ+u5/UKfTaXEaAABwKdxutxITE31/xy+EQnSJzlwmczqdFCIAAJqZH5vuwqRqAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAwCjLly/X4MGDtXz5cqujIIRQiAAAxigvL9fKlSvl8Xi0cuVKlZeXWx0JIYJCBAAwxmOPPSaPxyNJ8ng8mj17tsWJECp4uGsI8Hq9qqqqsjpGg3m9XlVXV1sdAz9gt9t/9KGGzYHD4WgRxwHrfPLJJ9q3b5/fur179+qTTz5Rnz59LEqFUGHzer1eq0M0B263W1FRUaqoqAj60+5Pnz6tzMzMoO4TaGlycnIUGRlpdQw0Ux6PRyNGjJDb7T5nm9Pp1Lp16xQWxkWTluhS/37z0wcAtHgFBQXnLUPSd38wCwoKmjgRQg2XzEJMZc+75Q1rpj8Wr1fy1FmdAj8U1kpqppeabJ46XVb0utUx0AKkpKTI6XSetxRFRUUpJSXFglQIJc30L2/L5Q1rJYW3tjpGA0RYHQAtCNfzESxhYWGaPXu2HnrooXO2Pf7441wuA5fMAABm6NOnj5KTk/3Wde/eXb169bIoEUIJhQgAYIz58+f7zgaFhYVp3rx5FidCqKAQAQCMER0drTFjxigsLExjxoxRdHS01ZEQIphDBAAwyqRJkzRp0iSrYyDEcIYIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABjP0kK0YMEC9e3bV+3atVNsbKxGjBihQ4cO+Y0ZOHCgbDab3zJlyhS/MUePHtWwYcPUpk0bxcbG6uGHH1ZdXZ3fmG3btqlXr16y2+3q3LmzVqxY0diHBwAAmglLC9H777+vrKwsffjhh8rNzVVtba3S09N16tQpv3H333+/Tpw44VsWLlzo21ZfX69hw4appqZGu3bt0quvvqoVK1Zo9uzZvjFHjhzRsGHDNGjQIBUVFWnatGm677779M477zTZsQIAgNDVyso337x5s9/rFStWKDY2VoWFhRowYIBvfZs2beRyuc67jy1btuizzz7T1q1bFRcXp549e2r+/PmaOXOm5syZo4iICC1ZskSdOnXS008/LUnq0qWLPvjgAy1atEgZGRmNd4AAAKBZCKk5RBUVFZKkmJgYv/UrV65Uhw4ddN1112nWrFn6v//7P9+2/Px8JScnKy4uzrcuIyNDbrdbBw4c8I1JS0vz22dGRoby8/MvmKW6ulput9tvAQAALZOlZ4jO5vF4NG3aNPXr10/XXXedb/0999yjpKQkJSQkaO/evZo5c6YOHTqkt956S5JUXFzsV4Yk+V4XFxdfdIzb7dbp06cVGRl5Tp4FCxZo7ty5QT1GAAAQmkKmEGVlZWn//v364IMP/NZPnjzZ93VycrLi4+M1ZMgQHT58WFdffXWj5Zk1a5amT5/ue+12u5WYmNho7wcAAKwTEpfMsrOztWHDBr333nu64oorLjo2JSVFkvTFF19Iklwul0pKSvzGnHl9Zt7RhcY4nc7znh2SJLvdLqfT6bcAAICWydJC5PV6lZ2drbVr1+rdd99Vp06dfvR7ioqKJEnx8fGSpNTUVO3bt0+lpaW+Mbm5uXI6neratatvTF5ent9+cnNzlZqaGqQjAQAAzZmlhSgrK0v/8z//o1WrVqldu3YqLi5WcXGxTp8+LUk6fPiw5s+fr8LCQv3zn//U22+/rXHjxmnAgAHq3r27JCk9PV1du3bV2LFj9emnn+qdd97Ro48+qqysLNntdknSlClT9OWXX2rGjBn6/PPP9eKLL2rNmjV68MEHLTt2AAAQOiwtRC+99JIqKio0cOBAxcfH+5bVq1dLkiIiIrR161alp6fr2muv1e9+9zuNGjVK69ev9+0jPDxcGzZsUHh4uFJTU3Xvvfdq3Lhxmjdvnm9Mp06dtHHjRuXm5qpHjx56+umntWzZMm65BwAAkiyeVO31ei+6PTExUe+///6P7icpKUmbNm266JiBAwdqz549PykfAAAwQ0hMqgYAALAShQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMZrZXUA/EB9rdUJgNDB70PI8Hq9qqqqsjpGg3m9XlVXV0uS7Ha7bDabxYkaxuFwNPtjCBUUohDT7tM3rI4AAOeoqqpSZmam1THwAzk5OYqMjLQ6Rotg6SWzBQsWqG/fvmrXrp1iY2M1YsQIHTp0yG9MVVWVsrKy1L59e1122WUaNWqUSkpK/MYcPXpUw4YNU5s2bRQbG6uHH35YdXV1fmO2bdumXr16yW63q3PnzlqxYkVjHx4AAGgmLD1D9P777ysrK0t9+/ZVXV2d/vCHPyg9PV2fffaZ2rZtK0l68MEHtXHjRv31r39VVFSUsrOzddttt2nnzp2SpPr6eg0bNkwul0u7du3SiRMnNG7cOLVu3VpPPvmkJOnIkSMaNmyYpkyZopUrVyovL0/33Xef4uPjlZGRYdnxn8/JHqOl8NZWxwBCQ30tZ01DhMPhUE5OjtUxGqyqqkojR46UJK1du1YOh8PiRA3T3POHEpvX6/VaHeKMr7/+WrGxsXr//fc1YMAAVVRUqGPHjlq1apVuv/12SdLnn3+uLl26KD8/XzfeeKNycnL061//WsePH1dcXJwkacmSJZo5c6a+/vprRUREaObMmdq4caP279/ve6/Ro0ervLxcmzdvvqRsbrdbUVFRqqiokNPpDOpxnz592ncq+mSvsRQi4Iz6WrXb/f9J4tIAguPsf2/5/5QZLvXvd0jdZVZRUSFJiomJkSQVFhaqtrZWaWlpvjHXXnutrrzySuXn50uS8vPzlZyc7CtDkpSRkSG3260DBw74xpy9jzNjzuzjfKqrq+V2u/0WAADQMoVMIfJ4PJo2bZr69eun6667TpJUXFysiIgIRUdH+42Ni4tTcXGxb8zZZejM9jPbLjbG7Xbr9OnT582zYMECRUVF+ZbExMQGHyMAAAhNIVOIsrKytH//fr3xRmjMF5g1a5YqKip8y7Fjx6yOBAAAGklI3HafnZ2tDRs2aPv27briiit8610ul2pqalReXu53lqikpEQul8s35qOPPvLb35m70M4e88M700pKSuR0Oi94/dhut8tutzf42AAAQOiz9AyR1+tVdna21q5dq3fffVedOnXy2967d2+1bt1aeXl5vnWHDh3S0aNHlZqaKklKTU3Vvn37VFpa6huTm5srp9Oprl27+sacvY8zY87sAwAAmM3SM0RZWVlatWqV/va3v6ldu3a+OT9RUVGKjIxUVFSUJk2apOnTpysmJkZOp1NTp05VamqqbrzxRklSenq6unbtqrFjx2rhwoUqLi7Wo48+qqysLN8ZnilTpuiFF17QjBkzNHHiRL377rtas2aNNm7caNmxAwCA0GHpGaKXXnpJFRUVGjhwoOLj433L6tWrfWMWLVqkX//61xo1apQGDBggl8ult956y7c9PDxcGzZsUHh4uFJTU3Xvvfdq3Lhxmjdvnm9Mp06dtHHjRuXm5qpHjx56+umntWzZspD7DCIAAGANS88QXcpHIDkcDi1evFiLFy++4JikpCRt2rTpovsZOHCg9uzZ85MzAgCAli9k7jIDAACwCoUIAAAYj0IEAACMF/AcovLycn300UcqLS2Vx+Px2zZu3LgGBwMAAGgqARWi9evXa8yYMaqsrJTT6ZTNZvNts9lsFCIAANCsBHTJ7He/+50mTpyoyspKlZeX69tvv/UtZWVlwc4IAADQqAIqRF999ZUeeOABtWnTJth5AAAAmlxAhSgjI0OffPJJsLMAAABYIqA5RMOGDdPDDz+szz77TMnJyWrdurXf9t/85jdBCQcAANAUAipE999/vyT5PR7jDJvNpvr6+oalAgAAaEIBFaIf3mYPAADQnDX4gxmrqqqCkQMAAMAyARWi+vp6zZ8/Xz/72c902WWX6csvv5QkPfbYY1q+fHlQAwIAADS2gArRf//3f2vFihVauHChIiIifOuvu+46LVu2LGjhAAAAmkJAhei1117T0qVLNWbMGIWHh/vW9+jRQ59//nnQwgEAADSFgD+YsXPnzues93g8qq2tbXAoAACAphRQIeratat27Nhxzvo333xT119/fYNDAQAANKWAbrufPXu2xo8fr6+++koej0dvvfWWDh06pNdee00bNmwIdkYAAIBGFdAZoltvvVXr16/X1q1b1bZtW82ePVsHDx7U+vXrdfPNNwc7IwAAQKMK6AyRJPXv31+5ubnBzAIAAGCJBn8wIwAAQHN3yWeILr/8ctlstksaW1ZWFnAgAACApnbJhejZZ59txBgAAADWueRCNH78+J+886eeekpTpkxRdHT0T/5eAACAptKoc4iefPJJLp8BAICQ16iFyOv1NubuAQAAgoK7zAAAgPEoRAAAwHgUIgAAYDwKEQAAMF7QC9Hp06d9X/fv31+RkZHBfgsAAICgCqgQPfDAA+ddf+rUKd1yyy2+15s2bVJ8fHxgyQAAAJpIQIVo48aNevzxx/3WnTp1SkOHDlVdXV1QggEAADSVgJ52v2XLFvXv31+XX365pk2bppMnTyojI0OtWrVSTk5OsDMCAAA0qoAK0dVXX63Nmzdr0KBBCgsL0+uvvy673a6NGzeqbdu2wc4IAADQqAIqRJLUvXt3bdiwQTfffLNSUlK0YcMGJlADAIBm6ZIL0fXXXy+bzXbOervdruPHj6tfv36+dbt37w5OOgAAgCZwyYVoxIgRjRgDAADAOpdciH54VxkAAEBLEfAcIkmqqalRaWmpPB6P3/orr7yyQaEAAACaUkCF6O9//7smTZqkXbt2+a33er2y2Wyqr68PSjgAAICmEFAhmjBhglq1aqUNGzYoPj7+vJOtAQAAmouAClFRUZEKCwt17bXXBjsPAABAkwvo0R1du3bVv//972BnAQAAsERAheiPf/yjZsyYoW3btumbb76R2+32WwAAAJqTgC6ZpaWlSZKGDBnit55J1QAAoDkKqBC99957wc4BAABgmYAK0a9+9atg5wAAALBMQIVo+/btF90+YMCAgMIAAABYIaBCNHDgwHPWnf1ZRMwhAgAAzUlAd5l9++23fktpaak2b96svn37asuWLcHOCAAA0KgCOkMUFRV1zrqbb75ZERERmj59ugoLCxscDAAAoKkEdIboQuLi4nTo0KFg7hIAAKDRBXSGaO/evX6vvV6vTpw4oaeeeko9e/YMRi4AAIAmE9AZop49e+r6669Xz549fV/fcsstqqmp0bJlyy55P9u3b9fw4cOVkJAgm82mdevW+W3/7W9/K5vN5rcMHTrUb0xZWZnGjBkjp9Op6OhoTZo0SZWVlX5j9u7dq/79+8vhcCgxMVELFy4M5LABAEALFdAZoiNHjvi9DgsLU8eOHeVwOH7Sfk6dOqUePXpo4sSJuu222847ZujQoXrllVd8r+12u9/2MWPG6MSJE8rNzVVtba0mTJigyZMna9WqVZIkt9ut9PR0paWlacmSJdq3b58mTpyo6OhoTZ48+SflBYCfyuv1qqqqyuoY+N7ZPwt+LqHD4XD43a1uhYAKUVJSUlDePDMzU5mZmRcdY7fb5XK5zrvt4MGD2rx5sz7++GP16dNHkvT888/rlltu0Z///GclJCRo5cqVqqmp0csvv6yIiAh169ZNRUVFeuaZZyhEABpdVVXVj/47B2uMHDnS6gj4Xk5OjiIjIy3NEFAhkqS8vDzl5eWptLRUHo/Hb9vLL7/c4GBnbNu2TbGxsbr88ss1ePBgPfHEE2rfvr0kKT8/X9HR0b4yJH33nLWwsDAVFBRo5MiRys/P14ABAxQREeEbk5GRoT/+8Y/69ttvdfnll5/3faurq1VdXe17zUNrAQBouQIqRHPnztW8efPUp08fxcfHN9pprqFDh+q2225Tp06ddPjwYf3hD39QZmam8vPzFR4eruLiYsXGxvp9T6tWrRQTE6Pi4mJJUnFxsTp16uQ3Ji4uzrftQoVowYIFmjt3biMcFQBTvXBTmezhXqtjGM3rlWq+/2/4iDDJ4qs0Rquutyn7gxirY/gEVIiWLFmiFStWaOzYscHO42f06NG+r5OTk9W9e3ddffXV2rZtm4YMGdKo7z1r1ixNnz7d99rtdisxMbFR3xNAy2YP98oebnUK/LTZrmg8ofUfBwHdZVZTU6Nf/vKXwc7yo37+85+rQ4cO+uKLLyRJLpdLpaWlfmPq6upUVlbmm3fkcrlUUlLiN+bM6wvNTZK+m7vkdDr9FgAA0DIFVIjuu+8+311cTelf//qXvvnmG8XHx0uSUlNTVV5e7vfJ2O+++648Ho9SUlJ8Y7Zv367a2lrfmNzcXF1zzTUXvFwGAADMcsmXzM6+fOTxeLR06VJt3bpV3bt3V+vWrf3GPvPMM5e0z8rKSt/ZHum72/mLiooUExOjmJgYzZ07V6NGjZLL5dLhw4c1Y8YMde7cWRkZGZKkLl26aOjQobr//vu1ZMkS1dbWKjs7W6NHj1ZCQoIk6Z577tHcuXM1adIkzZw5U/v379df/vIXLVq06FIPHQAAtHCXXIj27Nnj9/rMJ1Lv37/fb/1PmWD9ySefaNCgQb7XZ0rX+PHj9dJLL2nv3r169dVXVV5eroSEBKWnp2v+/Pl+n0W0cuVKZWdna8iQIQoLC9OoUaP03HPP+bZHRUVpy5YtysrKUu/evdWhQwfNnj2bW+4BAIDPJRei9957L+hvPnDgQHm9F55U9c477/zoPmJiYn708l337t21Y8eOn5wPAACYIaA5RBUVFSorKztnfVlZGZ/XAwAAmp2ACtHo0aP1xhtvnLN+zZo1frfKAwAANAcBFaKCggK/uT9nDBw4UAUFBQ0OBQAA0JQCKkTV1dWqq6s7Z31tba1Onz7d4FAAAABNKaBCdMMNN2jp0qXnrF+yZIl69+7d4FAAAABNKaBHdzzxxBNKS0vTp59+6nuERl5enj7++GNt2bIlqAEBAAAaW0BniPr166f8/HwlJiZqzZo1Wr9+vTp37qy9e/eqf//+wc4IAADQqAI6QyR998GMK1euvOiYp556SlOmTFF0dHSgbwMAANDoAjpDdKmefPLJ835eEQAAQChp1EJ0sU+hBgAACBWNWogAAACaAwoRAAAwHoUIAAAYj0IEAACMF/Bt95eif//+ioyMbMy3aHFsnjoxFd1iXq/k+f7RNGGtJJvN2jwGs3nOfUQQADSGgArRihUr9Nvf/vac9XV1dXrssce0YMECSdKmTZsaFM5ElxW9bnUEAACME9AlswceeEB33HGHvv32W9+6Q4cOKSUlRa+/zh90AADQvAR0hmjPnj269957lZycrFdeeUV///vfNWPGDI0YMUIvvvhisDO2eA6HQzk5OVbHwPeqqqo0cuRISdLatWvlcDgsTgRJ/BwANKqACtHVV1+tnTt3atq0aRo6dKjCw8P16quv6u677w52PiPYbDbmWoUoh8PBzwYADBDwXWYbN27UG2+8odTUVEVHR2v58uU6fvx4MLMBAAA0iYAK0X/8x3/ojjvu0MyZM7Vjxw7t3btXERERSk5O1po1a4KdEQAAoFEFdMls586dKigoUI8ePSRJLpdLmzZt0uLFizVx4kTdeeedQQ0JAADQmAIqRIWFhbLb7eesz8rKUlpaWoNDAQAANKWALpmdrwydcc011wQcBgAAwAoBf1L1m2++qTVr1ujo0aOqqanx27Z79+4GBwMAAGgqAZ0heu655zRhwgTFxcVpz549uuGGG9S+fXt9+eWXyszMDHZGAACARhVQIXrxxRe1dOlSPf/884qIiNCMGTOUm5urBx54QBUVFcHOCAAA0KgCKkRHjx7VL3/5S0lSZGSkTp48KUkaO3Ysj+4AAADNTkCFyOVyqaysTJJ05ZVX6sMPP5QkHTlyRF4vz2oHAADNS0CFaPDgwXr77bclSRMmTNCDDz6om2++WXfddZfvGVAAAADNRUB3mS1dulQej0fSd5891KFDB+3cuVO/+c1vNGXKlKAGBAAAaGwBFaKwsDDV1NRo9+7dKi0tVWRkpO8DGTdv3qzhw4cHNSQAAEBjCqgQbd68WWPHjtU333xzzjabzab6+voGBwMAAGgqAc0hmjp1qu68806dOHFCHo/Hb6EMAQCA5iagM0QlJSWaPn264uLigp0HAFqsav57EfAJtd+HgArR7bffrm3btunqq68Odh4AaLGyP2hvdQQAFxBQIXrhhRd0xx13aMeOHUpOTlbr1q39tj/wwANBCQcAANAUAipEr7/+urZs2SKHw6Ft27bJZrP5ttlsNgoRAJzHCzd9I3u41SmA0FBdH1pnTQMqRI888ojmzp2r3//+9woLC2heNgAYxx4uChEQogJqMzU1NbrrrrsoQwAAoEUIqNGMHz9eq1evDnYWAAAASwR0yay+vl4LFy7UO++8o+7du58zqfqZZ54JSjgAAICmEFAh2rdvn66//npJ0v79+/22nT3BGgAAoDkIqBC99957wc4BAABgGWZFAwAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxLC1E27dv1/Dhw5WQkCCbzaZ169b5bfd6vZo9e7bi4+MVGRmptLQ0/eMf//AbU1ZWpjFjxsjpdCo6OlqTJk1SZWWl35i9e/eqf//+cjgcSkxM1MKFCxv70AAAQDNiaSE6deqUevToocWLF593+8KFC/Xcc89pyZIlKigoUNu2bZWRkaGqqirfmDFjxujAgQPKzc3Vhg0btH37dk2ePNm33e12Kz09XUlJSSosLNSf/vQnzZkzR0uXLm304wMAAM1DQA93DZbMzExlZmaed5vX69Wzzz6rRx99VLfeeqsk6bXXXlNcXJzWrVun0aNH6+DBg9q8ebM+/vhj9enTR5L0/PPP65ZbbtGf//xnJSQkaOXKlaqpqdHLL7+siIgIdevWTUVFRXrmmWf8ihMAADBXyM4hOnLkiIqLi5WWluZbFxUVpZSUFOXn50uS8vPzFR0d7StDkpSWlqawsDAVFBT4xgwYMEARERG+MRkZGTp06JC+/fbbC75/dXW13G633wIAAFqmkC1ExcXFkqS4uDi/9XFxcb5txcXFio2N9dveqlUrxcTE+I053z7Ofo/zWbBggaKionxLYmJiww4IAACErJAtRFabNWuWKioqfMuxY8esjgQAABpJyBYil8slSSopKfFbX1JS4tvmcrlUWlrqt72urk5lZWV+Y863j7Pf43zsdrucTqffAgAAWqaQLUSdOnWSy+VSXl6eb53b7VZBQYFSU1MlSampqSovL1dhYaFvzLvvviuPx6OUlBTfmO3bt6u2ttY3Jjc3V9dcc40uv/zyJjoaAAAQyiwtRJWVlSoqKlJRUZGk7yZSFxUV6ejRo7LZbJo2bZqeeOIJvf3229q3b5/GjRunhIQEjRgxQpLUpUsXDR06VPfff78++ugj7dy5U9nZ2Ro9erQSEhIkSffcc48iIiI0adIkHThwQKtXr9Zf/vIXTZ8+3aKjBgAAocbS2+4/+eQTDRo0yPf6TEkZP368VqxYoRkzZujUqVOaPHmyysvLddNNN2nz5s1yOBy+71m5cqWys7M1ZMgQhYWFadSoUXruued826OiorRlyxZlZWWpd+/e6tChg2bPns0t9wAAwMfSQjRw4EB5vd4LbrfZbJo3b57mzZt3wTExMTFatWrVRd+ne/fu2rFjR8A5AQBAyxayc4gAAACaCoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHitrA4AAKaorrdJ8lodw2her1Tj+e7riDDJZrM2j8m++30IHRQiAGgi2R/EWB0BwAVwyQwAABiPM0QA0IgcDodycnKsjoHvVVVVaeTIkZKktWvXyuFwWJwIkkLi50AhAoBGZLPZFBkZaXUMnIfD4eBnAx8umQEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8UK+EM2ZM0c2m81vufbaa33bq6qqlJWVpfbt2+uyyy7TqFGjVFJS4rePo0ePatiwYWrTpo1iY2P18MMPq66urqkPBQAAhKhm8Syzbt26aevWrb7XrVr9v9gPPvigNm7cqL/+9a+KiopSdna2brvtNu3cuVOSVF9fr2HDhsnlcmnXrl06ceKExo0bp9atW+vJJ59s8mMBAAChp1kUolatWsnlcp2zvqKiQsuXL9eqVas0ePBgSdIrr7yiLl266MMPP9SNN96oLVu26LPPPtPWrVsVFxennj17av78+Zo5c6bmzJmjiIiIpj4cAAAQYkL+kpkk/eMf/1BCQoJ+/vOfa8yYMTp69KgkqbCwULW1tUpLS/ONvfbaa3XllVcqPz9fkpSfn6/k5GTFxcX5xmRkZMjtduvAgQMXfM/q6mq53W6/BQAAtEwhX4hSUlK0YsUKbd68WS+99JKOHDmi/v376+TJkyouLlZERISio6P9vicuLk7FxcWSpOLiYr8ydGb7mW0XsmDBAkVFRfmWxMTE4B4YAAAIGSF/ySwzM9P3dffu3ZWSkqKkpCStWbNGkZGRjfa+s2bN0vTp032v3W43pQgAgBYq5M8Q/VB0dLR+8Ytf6IsvvpDL5VJNTY3Ky8v9xpSUlPjmHLlcrnPuOjvz+nzzks6w2+1yOp1+CwAAaJmaXSGqrKzU4cOHFR8fr969e6t169bKy8vzbT906JCOHj2q1NRUSVJqaqr27dun0tJS35jc3Fw5nU517dq1yfMDAIDQE/KXzB566CENHz5cSUlJOn78uB5//HGFh4fr7rvvVlRUlCZNmqTp06crJiZGTqdTU6dOVWpqqm688UZJUnp6urp27aqxY8dq4cKFKi4u1qOPPqqsrCzZ7XaLjw4AAISCkC9E//rXv3T33Xfrm2++UceOHXXTTTfpww8/VMeOHSVJixYtUlhYmEaNGqXq6mplZGToxRdf9H1/eHi4NmzYoP/8z/9Uamqq2rZtq/Hjx2vevHlWHRIAAAgxIV+I3njjjYtudzgcWrx4sRYvXnzBMUlJSdq0aVOwowEAgBai2c0hAgAACDYKEQAAMB6FCAAAGI9CBAAAjBfyk6rRfHi9XlVVVVkdo8HOPoaWcDwOh0M2m83qGAAQ0ihECJqqqiq/R620BCNHjrQ6QoPl5OQ06mNuAKAl4JIZAAAwHmeIEDQOh0M5OTlWx2gwr9er6upqSd890665X25yOBxWRwCAkEchQtDYbLYWc2mmTZs2VkcAADQhLpkBAADjUYgAAIDxKEQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMB6FCAAAGK+V1QEAAKHP6/WqqqrK6hgNdvYxtITjcTgcstlsVsdoEShEAIAfVVVVpczMTKtjBNXIkSOtjtBgOTk5ioyMtDpGi8AlMwAAYDzOEAEAfpTD4VBOTo7VMRrM6/WqurpakmS325v95SaHw2F1hBaDQgQA+FE2m63FXJpp06aN1REQgrhkBgAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAAAYj0IEAACMRyECAADGoxABAADjUYgAAIDxKEQAAMB4PO3+Enm9XkmS2+22OAkAALhUZ/5un/k7fiEUokt08uRJSVJiYqLFSQAAwE918uRJRUVFXXC7zftjlQmSJI/Ho+PHj6tdu3ay2WxWx0Ejc7vdSkxM1LFjx+R0Oq2OAyCI+P02i9fr1cmTJ5WQkKCwsAvPFOIM0SUKCwvTFVdcYXUMNDGn08k/mEALxe+3OS52ZugMJlUDAADjUYgAAIDxKETAedjtdj3++OOy2+1WRwEQZPx+43yYVA0AAIzHGSIAAGA8ChEAADAehQgAABiPQgQAAIxHIQJ+YPHixbrqqqvkcDiUkpKijz76yOpIAIJg+/btGj58uBISEmSz2bRu3TqrIyGEUIiAs6xevVrTp0/X448/rt27d6tHjx7KyMhQaWmp1dEANNCpU6fUo0cPLV682OooCEHcdg+cJSUlRX379tULL7wg6btn2CUmJmrq1Kn6/e9/b3E6AMFis9m0du1ajRgxwuooCBGcIQK+V1NTo8LCQqWlpfnWhYWFKS0tTfn5+RYmAwA0NgoR8L1///vfqq+vV1xcnN/6uLg4FRcXW5QKANAUKEQAAMB4FCLgex06dFB4eLhKSkr81peUlMjlclmUCgDQFChEwPciIiLUu3dv5eXl+dZ5PB7l5eUpNTXVwmQAgMbWyuoAQCiZPn26xo8frz59+uiGG27Qs88+q1OnTmnChAlWRwPQQJWVlfriiy98r48cOaKioiLFxMToyiuvtDAZQgG33QM/8MILL+hPf/qTiouL1bNnTz333HNKSUmxOhaABtq2bZsGDRp0zvrx48drxYoVTR8IIYVCBAAAjMccIgAAYDwKEQAAMB6FCAAAGI9CBAAAjEchAgAAxqMQAQAA41GIAACA8ShEAADAeBQiAABgPAoRAAAwHoUIAC5RTU2N1REANBIKEYBm780331RycrIiIyPVvn17paWl6dSpU5Kkl19+Wd26dZPdbld8fLyys7N933f06FHdeuutuuyyy+R0OnXnnXeqpKTEt33OnDnq2bOnli1bpk6dOsnhcEiSysvLdd9996ljx45yOp0aPHiwPv3006Y9aABBRSEC0KydOHFCd999tyZOnKiDBw9q27Ztuu222+T1evXSSy8pKytLkydP1r59+/T222+rc+fOkiSPx6Nbb71VZWVlev/995Wbm6svv/xSd911l9/+v/jiC/3v//6v3nrrLRUVFUmS7rjjDpWWlionJ0eFhYXq1auXhgwZorKysqY+fABBwtPuATRru3fvVu/evfXPf/5TSUlJftt+9rOfacKECXriiSfO+b7c3FxlZmbqyJEjSkxMlCR99tln6tatmz766CP17dtXc+bM0ZNPPqmvvvpKHTt2lCR98MEHGjZsmEpLS2W3233769y5s2bMmKHJkyc34tECaCytrA4AAA3Ro0cPDRkyRMnJycrIyFB6erpuv/121dbW6vjx4xoyZMh5v+/gwYNKTEz0lSFJ6tq1q6Kjo3Xw4EH17dtXkpSUlOQrQ5L06aefqrKyUu3bt/fb3+nTp3X48OFGOEIATYFCBKBZCw8PV25urnbt2qUtW7bo+eef1yOPPKK8vLyg7L9t27Z+rysrKxUfH69t27adMzY6Ojoo7wmg6VGIADR7NptN/fr1U79+/TR79mwlJSUpNzdXV111lfLy8jRo0KBzvqdLly46duyYjh075nfJrLy8XF27dr3ge/Xq1UvFxcVq1aqVrrrqqsY6JABNjEIEoFkrKChQXl6e0tPTFRsbq4KCAn399dfq0qWL5syZoylTpig2NlaZmZk6efKkdu7cqalTpyotLU3JyckaM2aMnn32WdXV1em//uu/9Ktf/Up9+vS54PulpaUpNTVVI0aM0MKFC/WLX/xCx48f18aNGzVy5MiLfi+A0EUhAtCsOZ1Obd++Xc8++6zcbreSkpL09NNPKzMzU5JUVVWlRYsW6aGHHlKHDh10++23S/rurNLf/vY3TZ06VQMGDFBYWJiGDh2q559//qLvZ7PZtGnTJj3yyCOaMGGCvv76a7lcLg0YMEBxcXGNfrwAGgd3mQEAAOPxOUQAAMB4FCIAAGA8ChEAADAehQgAABiPQgQAAIxHIQIAAMajEAEAAONRiAAAgPEoRAAAwHgUIgAAYDwKEQAAMN7/D20ly0xCaChRAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='score', y='max_chunk_len', data=pred_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ddef3c2-0b02-4cf5-b595-0d83de62f6d6",
   "metadata": {},
   "source": [
    "从两张图可以比较明显地看出来，回答错误的问题，对应的上下文，无论是平均还是最长的那个，总体要大于回答正确的  \n",
    "从平均长度角度来看，回答错误的上下文基本上在700以上，这个长度对中文而言，基本上接近bge large v1.5的最大长度了，检索效果不好是可以遇见的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4dac147-e044-4648-9ee1-3ae5cd4a2a69",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 1058.563616,
   "end_time": "2024-11-23T14:46:37.625874",
   "environment_variables": {},
   "exception": null,
   "input_path": "13_contextual_embeddings.ipynb",
   "output_path": "run_13_contextual_embeddings.ipynb",
   "parameters": {},
   "start_time": "2024-11-23T14:28:59.062258",
   "version": "2.6.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {
     "0cd8c168767249f2a5fa412173f6e751": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ce1d1d9d86c40d9839877ff95734491",
       "max": 100,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_231702cf4d79477f9d5548665a1b18fe",
       "tabbable": null,
       "tooltip": null,
       "value": 100
      }
     },
     "2133bb8d85d34b8db112b4408ad60320": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "231702cf4d79477f9d5548665a1b18fe": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "23b1ad9c0f9c46c888da66e85c90eb84": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "24e6eadc3dc940ecabf30dd1a3c6d1f3": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_fa4bddf2c33241b5bf918054518f128f",
       "max": 52,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_edc33e82be8f41eba6a18a0ef074ab7a",
       "tabbable": null,
       "tooltip": null,
       "value": 52
      }
     },
     "2f60367b1c8941e2bf71661c33969ae8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3865f25c78aa46f29a25d807205281c3": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3d0b06deaa654b989eece8cde06fa0f8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "3f8ceda83287475b97608e42f5f6782f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "4881e496f1c84fe29ce9ebebaddfb3c2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bd096d5d219a467786a85cfe1613fedd",
        "IPY_MODEL_24e6eadc3dc940ecabf30dd1a3c6d1f3",
        "IPY_MODEL_bc2b8104b4244d8cacedeb95e800d91c"
       ],
       "layout": "IPY_MODEL_6b9a8e43c1c342dba500a14e7149b600",
       "tabbable": null,
       "tooltip": null
      }
     },
     "5ce1d1d9d86c40d9839877ff95734491": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "5ddb08be5cc64c9ab40a1d62a21763a5": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_86283159049d48b1adcfb2de2d404d4d",
       "placeholder": "​",
       "style": "IPY_MODEL_2133bb8d85d34b8db112b4408ad60320",
       "tabbable": null,
       "tooltip": null,
       "value": " 100/100 [08:34&lt;00:00, 10.01s/it]"
      }
     },
     "5ef9d83ccad1471f85335900a24a8553": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "6b9a8e43c1c342dba500a14e7149b600": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "816a079a8c804fbfa9b9a74f941abea8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bcc69ec5db1b4aab977807284c9290e7",
        "IPY_MODEL_0cd8c168767249f2a5fa412173f6e751",
        "IPY_MODEL_5ddb08be5cc64c9ab40a1d62a21763a5"
       ],
       "layout": "IPY_MODEL_d1178c6858284f788a80b5f2a14fd0b7",
       "tabbable": null,
       "tooltip": null
      }
     },
     "86283159049d48b1adcfb2de2d404d4d": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "8ff8262c56604119883f4a5f13bb74ab": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ef9d83ccad1471f85335900a24a8553",
       "placeholder": "​",
       "style": "IPY_MODEL_e89e77133c344fc48c1d62f5a607ec93",
       "tabbable": null,
       "tooltip": null,
       "value": " 8/8 [00:18&lt;00:00,  2.27s/it]"
      }
     },
     "9189a076554543aaa6f5ee04e40dbe1b": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "988e6697a2af486fadeaf0b84347b565": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_e1aae4c55cb64f379e74f15357275628",
        "IPY_MODEL_fd9e23198ca1489a9773fda3510bf857",
        "IPY_MODEL_8ff8262c56604119883f4a5f13bb74ab"
       ],
       "layout": "IPY_MODEL_d2ee15001d2244529f7e47d3333c0f8e",
       "tabbable": null,
       "tooltip": null
      }
     },
     "9fc7d91f94a94933bde5ba80e64587de": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "a7d240a289084bdfba4724c0efd5ab07": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "bc2b8104b4244d8cacedeb95e800d91c": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_2f60367b1c8941e2bf71661c33969ae8",
       "placeholder": "​",
       "style": "IPY_MODEL_9fc7d91f94a94933bde5ba80e64587de",
       "tabbable": null,
       "tooltip": null,
       "value": " 52/52 [04:26&lt;00:00,  4.22s/it]"
      }
     },
     "bcc69ec5db1b4aab977807284c9290e7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3f8ceda83287475b97608e42f5f6782f",
       "placeholder": "​",
       "style": "IPY_MODEL_3d0b06deaa654b989eece8cde06fa0f8",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "bd096d5d219a467786a85cfe1613fedd": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3865f25c78aa46f29a25d807205281c3",
       "placeholder": "​",
       "style": "IPY_MODEL_9189a076554543aaa6f5ee04e40dbe1b",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "cc3ed8dc4a5c43aca7b62d904865b2fa": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "cf68b6fe24964ce792aa63827489cb97": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "d1178c6858284f788a80b5f2a14fd0b7": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d2ee15001d2244529f7e47d3333c0f8e": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "e1aae4c55cb64f379e74f15357275628": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_23b1ad9c0f9c46c888da66e85c90eb84",
       "placeholder": "​",
       "style": "IPY_MODEL_cf68b6fe24964ce792aa63827489cb97",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "e89e77133c344fc48c1d62f5a607ec93": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "edc33e82be8f41eba6a18a0ef074ab7a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "fa4bddf2c33241b5bf918054518f128f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "fd9e23198ca1489a9773fda3510bf857": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_cc3ed8dc4a5c43aca7b62d904865b2fa",
       "max": 8,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_a7d240a289084bdfba4724c0efd5ab07",
       "tabbable": null,
       "tooltip": null,
       "value": 8
      }
     }
    },
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
